Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

6

from unittest import TestCase, mock

7

import os

8

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

9

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

10

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

11

class TokenizeTest(TestCase):

12

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

13

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

14

# The tests can be really simple. Given a small fragment of source

15

# code, print out a table with tokens. The ENDMARKER is omitted for

16

# brevity.

17

18

def check_tokenize(self, s, expected):

19

# Format the tokens in s in a table format.

20

# The ENDMARKER is omitted.

21

result = []

22

f = BytesIO(s.encode('utf-8'))

23

for type, token, start, end, line in tokenize(f.readline):

24

if type == ENDMARKER:

25

break

26

type = tok_name[type]

27

result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %

28

locals())

29

self.assertEqual(result,

30

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

31

expected.rstrip().splitlines())

32

33

def test_basic(self):

34

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

35

NUMBER '1' (1, 0) (1, 1)

36

OP '+' (1, 2) (1, 3)

37

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

38

""")

39

self.check_tokenize("if False:\n"

40

" # NL\n"

41

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

42

NAME 'if' (1, 0) (1, 2)

43

NAME 'False' (1, 3) (1, 8)

44

OP ':' (1, 8) (1, 9)

45

NEWLINE '\\n' (1, 9) (1, 10)

46

COMMENT '# NL' (2, 4) (2, 8)

47

NL '\\n' (2, 8) (2, 9)

48

INDENT ' ' (3, 0) (3, 4)

49

NAME 'True' (3, 4) (3, 8)

50

OP '=' (3, 9) (3, 10)

51

NAME 'False' (3, 11) (3, 16)

52

COMMENT '# NEWLINE' (3, 17) (3, 26)

53

NEWLINE '\\n' (3, 26) (3, 27)

54

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

55

""")

56

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

62

with self.assertRaisesRegex(IndentationError,

63

"unindent does not match any "

64

"outer indentation level"):

65

for tok in tokenize(readline):

66

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

67

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

68

def test_int(self):

69

# Ordinary integers and binary operators

70

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

71

NUMBER '0xff' (1, 0) (1, 4)

72

OP '<=' (1, 5) (1, 7)

73

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

74

""")

75

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

76

NUMBER '0b10' (1, 0) (1, 4)

77

OP '<=' (1, 5) (1, 7)

78

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

79

""")

80

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

81

NUMBER '0o123' (1, 0) (1, 5)

82

OP '<=' (1, 6) (1, 8)

83

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

84

""")

85

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

86

NUMBER '1234567' (1, 0) (1, 7)

87

OP '>' (1, 8) (1, 9)

88

OP '~' (1, 10) (1, 11)

89

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

90

""")

91

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

92

NUMBER '2134568' (1, 0) (1, 7)

93

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

94

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

95

""")

96

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

97

OP '(' (1, 0) (1, 1)

98

OP '-' (1, 1) (1, 2)

99

NUMBER '124561' (1, 2) (1, 8)

100

OP '-' (1, 8) (1, 9)

101

NUMBER '1' (1, 9) (1, 10)

102

OP ')' (1, 10) (1, 11)

103

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

104

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

105

""")

106

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

107

NUMBER '0xdeadbeef' (1, 0) (1, 10)

108

OP '!=' (1, 11) (1, 13)

109

OP '-' (1, 14) (1, 15)

110

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

111

""")

112

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

113

NUMBER '0xdeadc0de' (1, 0) (1, 10)

114

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

115

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

116

""")

117

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

118

NUMBER '0xFF' (1, 0) (1, 4)

119

OP '&' (1, 5) (1, 6)

120

NUMBER '0x15' (1, 7) (1, 11)

121

OP '|' (1, 12) (1, 13)

122

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

123

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

124

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

125

def test_long(self):

126

# Long integers

127

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

128

NAME 'x' (1, 0) (1, 1)

129

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

130

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

131

""")

132

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

133

NAME 'x' (1, 0) (1, 1)

134

OP '=' (1, 2) (1, 3)

135

NUMBER '0xffffffffff (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

136

""")

137

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

138

NAME 'x' (1, 0) (1, 1)

139

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

140

NUMBER '123141242151 (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

141

""")

142

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

143

NAME 'x' (1, 0) (1, 1)

144

OP '=' (1, 2) (1, 3)

145

OP '-' (1, 4) (1, 5)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

146

NUMBER '159215902150 (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

147

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

148

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

149

def test_float(self):

150

# Floating point numbers

151

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

152

NAME 'x' (1, 0) (1, 1)

153

OP '=' (1, 2) (1, 3)

154

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

155

""")

156

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

157

NAME 'x' (1, 0) (1, 1)

158

OP '=' (1, 2) (1, 3)

159

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

160

""")

161

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

162

NAME 'x' (1, 0) (1, 1)

163

OP '=' (1, 2) (1, 3)

164

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

165

""")

166

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

167

NAME 'x' (1, 0) (1, 1)

168

OP '=' (1, 2) (1, 3)

169

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

170

""")

171

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

172

NAME 'x' (1, 0) (1, 1)

173

OP '=' (1, 2) (1, 3)

174

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

175

""")

176

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

177

NAME 'x' (1, 0) (1, 1)

178

OP '+' (1, 1) (1, 2)

179

NAME 'y' (1, 2) (1, 3)

180

OP '=' (1, 4) (1, 5)

181

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

182

""")

183

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

184

NAME 'x' (1, 0) (1, 1)

185

OP '=' (1, 2) (1, 3)

186

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

187

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

188

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

189

def test_string(self):

190

# String literals

191

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

192

NAME 'x' (1, 0) (1, 1)

193

OP '=' (1, 2) (1, 3)

194

STRING "''" (1, 4) (1, 6)

195

OP ';' (1, 6) (1, 7)

196

NAME 'y' (1, 8) (1, 9)

197

OP '=' (1, 10) (1, 11)

198

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

199

""")

200

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

201

NAME 'x' (1, 0) (1, 1)

202

OP '=' (1, 2) (1, 3)

203

STRING '\\'"\\'' (1, 4) (1, 7)

204

OP ';' (1, 7) (1, 8)

205

NAME 'y' (1, 9) (1, 10)

206

OP '=' (1, 11) (1, 12)

207

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

208

""")

209

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

210

NAME 'x' (1, 0) (1, 1)

211

OP '=' (1, 2) (1, 3)

212

STRING '"doesn\\'t "' (1, 4) (1, 14)

213

NAME 'shrink' (1, 14) (1, 20)

214

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

215

""")

216

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

NAME 'x' (1, 0) (1, 1)

218

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

219

STRING "'abc'" (1, 4) (1, 9)

220

OP '+' (1, 10) (1, 11)

221

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

222

""")

223

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

224

NAME 'y' (1, 0) (1, 1)

225

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

226

STRING '"ABC"' (1, 4) (1, 9)

227

OP '+' (1, 10) (1, 11)

228

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

229

""")

230

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

231

NAME 'x' (1, 0) (1, 1)

232

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

233

STRING "r'abc'" (1, 4) (1, 10)

234

OP '+' (1, 11) (1, 12)

235

STRING "r'ABC'" (1, 13) (1, 19)

236

OP '+' (1, 20) (1, 21)

237

STRING "R'ABC'" (1, 22) (1, 28)

238

OP '+' (1, 29) (1, 30)

239

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

240

""")

241

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

242

NAME 'y' (1, 0) (1, 1)

243

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

244

STRING 'r"abc"' (1, 4) (1, 10)

245

OP '+' (1, 11) (1, 12)

246

STRING 'r"ABC"' (1, 13) (1, 19)

247

OP '+' (1, 20) (1, 21)

248

STRING 'R"ABC"' (1, 22) (1, 28)

249

OP '+' (1, 29) (1, 30)

250

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

251

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

252

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

253

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

254

STRING "u'abc'" (1, 0) (1, 6)

255

OP '+' (1, 7) (1, 8)

256

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

257

""")

258

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

259

STRING 'u"abc"' (1, 0) (1, 6)

260

OP '+' (1, 7) (1, 8)

261

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

262

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

263

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

264

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

265

STRING "b'abc'" (1, 0) (1, 6)

266

OP '+' (1, 7) (1, 8)

267

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

268

""")

269

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

270

STRING 'b"abc"' (1, 0) (1, 6)

271

OP '+' (1, 7) (1, 8)

272

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

273

""")

274

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

275

STRING "br'abc'" (1, 0) (1, 7)

276

OP '+' (1, 8) (1, 9)

277

STRING "bR'abc'" (1, 10) (1, 17)

278

OP '+' (1, 18) (1, 19)

279

STRING "Br'abc'" (1, 20) (1, 27)

280

OP '+' (1, 28) (1, 29)

281

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

282

""")

283

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

284

STRING 'br"abc"' (1, 0) (1, 7)

285

OP '+' (1, 8) (1, 9)

286

STRING 'bR"abc"' (1, 10) (1, 17)

287

OP '+' (1, 18) (1, 19)

288

STRING 'Br"abc"' (1, 20) (1, 27)

289

OP '+' (1, 28) (1, 29)

290

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

291

""")

292

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

293

STRING "rb'abc'" (1, 0) (1, 7)

294

OP '+' (1, 8) (1, 9)

295

STRING "rB'abc'" (1, 10) (1, 17)

296

OP '+' (1, 18) (1, 19)

297

STRING "Rb'abc'" (1, 20) (1, 27)

298

OP '+' (1, 28) (1, 29)

299

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

300

""")

301

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

302

STRING 'rb"abc"' (1, 0) (1, 7)

303

OP '+' (1, 8) (1, 9)

304

STRING 'rB"abc"' (1, 10) (1, 17)

305

OP '+' (1, 18) (1, 19)

306

STRING 'Rb"abc"' (1, 20) (1, 27)

307

OP '+' (1, 28) (1, 29)

308

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

309

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

310

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

311

def test_function(self):

312

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

313

NAME 'def' (1, 0) (1, 3)

314

NAME 'd22' (1, 4) (1, 7)

315

OP '(' (1, 7) (1, 8)

316

NAME 'a' (1, 8) (1, 9)

317

OP ',' (1, 9) (1, 10)

318

NAME 'b' (1, 11) (1, 12)

319

OP ',' (1, 12) (1, 13)

320

NAME 'c' (1, 14) (1, 15)

321

OP '=' (1, 15) (1, 16)

322

NUMBER '2' (1, 16) (1, 17)

323

OP ',' (1, 17) (1, 18)

324

NAME 'd' (1, 19) (1, 20)

325

OP '=' (1, 20) (1, 21)

326

NUMBER '2' (1, 21) (1, 22)

327

OP ',' (1, 22) (1, 23)

328

OP '*' (1, 24) (1, 25)

329

NAME 'k' (1, 25) (1, 26)

330

OP ')' (1, 26) (1, 27)

331

OP ':' (1, 27) (1, 28)

332

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

333

""")

334

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

335

NAME 'def' (1, 0) (1, 3)

336

NAME 'd01v_' (1, 4) (1, 9)

337

OP '(' (1, 9) (1, 10)

338

NAME 'a' (1, 10) (1, 11)

339

OP '=' (1, 11) (1, 12)

340

NUMBER '1' (1, 12) (1, 13)

341

OP ',' (1, 13) (1, 14)

342

OP '*' (1, 15) (1, 16)

343

NAME 'k' (1, 16) (1, 17)

344

OP ',' (1, 17) (1, 18)

345

OP '**' (1, 19) (1, 21)

346

NAME 'w' (1, 21) (1, 22)

347

OP ')' (1, 22) (1, 23)

348

OP ':' (1, 23) (1, 24)

349

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

350

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

351

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

352

def test_comparison(self):

353

# Comparison

354

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

355

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

356

NAME 'if' (1, 0) (1, 2)

357

NUMBER '1' (1, 3) (1, 4)

358

OP '<' (1, 5) (1, 6)

359

NUMBER '1' (1, 7) (1, 8)

360

OP '>' (1, 9) (1, 10)

361

NUMBER '1' (1, 11) (1, 12)

362

OP '==' (1, 13) (1, 15)

363

NUMBER '1' (1, 16) (1, 17)

364

OP '>=' (1, 18) (1, 20)

365

NUMBER '5' (1, 21) (1, 22)

366

OP '<=' (1, 23) (1, 25)

367

NUMBER '0x15' (1, 26) (1, 30)

368

OP '<=' (1, 31) (1, 33)

369

NUMBER '0x12' (1, 34) (1, 38)

370

OP '!=' (1, 39) (1, 41)

371

NUMBER '1' (1, 42) (1, 43)

372

NAME 'and' (1, 44) (1, 47)

373

NUMBER '5' (1, 48) (1, 49)

374

NAME 'in' (1, 50) (1, 52)

375

NUMBER '1' (1, 53) (1, 54)

376

NAME 'not' (1, 55) (1, 58)

377

NAME 'in' (1, 59) (1, 61)

378

NUMBER '1' (1, 62) (1, 63)

379

NAME 'is' (1, 64) (1, 66)

380

NUMBER '1' (1, 67) (1, 68)

381

NAME 'or' (1, 69) (1, 71)

382

NUMBER '5' (1, 72) (1, 73)

383

NAME 'is' (1, 74) (1, 76)

384

NAME 'not' (1, 77) (1, 80)

385

NUMBER '1' (1, 81) (1, 82)

386

OP ':' (1, 82) (1, 83)

387

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

388

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

389

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

390

def test_shift(self):

391

# Shift

392

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

393

NAME 'x' (1, 0) (1, 1)

394

OP '=' (1, 2) (1, 3)

395

NUMBER '1' (1, 4) (1, 5)

396

OP '<<' (1, 6) (1, 8)

397

NUMBER '1' (1, 9) (1, 10)

398

OP '>>' (1, 11) (1, 13)

399

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

400

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

401

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

402

def test_additive(self):

403

# Additive

404

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

405

NAME 'x' (1, 0) (1, 1)

406

OP '=' (1, 2) (1, 3)

407

NUMBER '1' (1, 4) (1, 5)

408

OP '-' (1, 6) (1, 7)

409

NAME 'y' (1, 8) (1, 9)

410

OP '+' (1, 10) (1, 11)

411

NUMBER '15' (1, 12) (1, 14)

412

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

413

NUMBER '1' (1, 17) (1, 18)

414

OP '+' (1, 19) (1, 20)

415

NUMBER '0x124' (1, 21) (1, 26)

416

OP '+' (1, 27) (1, 28)

417

NAME 'z' (1, 29) (1, 30)

418

OP '+' (1, 31) (1, 32)

419

NAME 'a' (1, 33) (1, 34)

420

OP '[' (1, 34) (1, 35)

421

NUMBER '5' (1, 35) (1, 36)

422

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

423

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

424

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

425

def test_multiplicative(self):

426

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

427

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

428

NAME 'x' (1, 0) (1, 1)

429

OP '=' (1, 2) (1, 3)

430

NUMBER '1' (1, 4) (1, 5)

431

OP '//' (1, 5) (1, 7)

432

NUMBER '1' (1, 7) (1, 8)

433

OP '*' (1, 8) (1, 9)

434

NUMBER '1' (1, 9) (1, 10)

435

OP '/' (1, 10) (1, 11)

436

NUMBER '5' (1, 11) (1, 12)

437

OP '*' (1, 12) (1, 13)

438

NUMBER '12' (1, 13) (1, 15)

439

OP '%' (1, 15) (1, 16)

440

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

441

OP '@' (1, 20) (1, 21)

442

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

443

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

444

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

445

def test_unary(self):

446

# Unary

447

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

448

OP '~' (1, 0) (1, 1)

449

NUMBER '1' (1, 1) (1, 2)

450

OP '^' (1, 3) (1, 4)

451

NUMBER '1' (1, 5) (1, 6)

452

OP '&' (1, 7) (1, 8)

453

NUMBER '1' (1, 9) (1, 10)

454

OP '|' (1, 11) (1, 12)

455

NUMBER '1' (1, 12) (1, 13)

456

OP '^' (1, 14) (1, 15)

457

OP '-' (1, 16) (1, 17)

458

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

459

""")

460

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

461

OP '-' (1, 0) (1, 1)

462

NUMBER '1' (1, 1) (1, 2)

463

OP '*' (1, 2) (1, 3)

464

NUMBER '1' (1, 3) (1, 4)

465

OP '/' (1, 4) (1, 5)

466

NUMBER '1' (1, 5) (1, 6)

467

OP '+' (1, 6) (1, 7)

468

NUMBER '1' (1, 7) (1, 8)

469

OP '*' (1, 8) (1, 9)

470

NUMBER '1' (1, 9) (1, 10)

471

OP '//' (1, 10) (1, 12)

472

NUMBER '1' (1, 12) (1, 13)

473

OP '-' (1, 14) (1, 15)

474

OP '-' (1, 16) (1, 17)

475

OP '-' (1, 17) (1, 18)

476

OP '-' (1, 18) (1, 19)

477

NUMBER '1' (1, 19) (1, 20)

478

OP '**' (1, 20) (1, 22)

479

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

480

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

481

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

482

def test_selector(self):

483

# Selector

484

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

485

NAME 'import' (1, 0) (1, 6)

486

NAME 'sys' (1, 7) (1, 10)

487

OP ',' (1, 10) (1, 11)

488

NAME 'time' (1, 12) (1, 16)

489

NEWLINE '\\n' (1, 16) (1, 17)

490

NAME 'x' (2, 0) (2, 1)

491

OP '=' (2, 2) (2, 3)

492

NAME 'sys' (2, 4) (2, 7)

493

OP '.' (2, 7) (2, 8)

494

NAME 'modules' (2, 8) (2, 15)

495

OP '[' (2, 15) (2, 16)

496

STRING "'time'" (2, 16) (2, 22)

497

OP ']' (2, 22) (2, 23)

498

OP '.' (2, 23) (2, 24)

499

NAME 'time' (2, 24) (2, 28)

500

OP '(' (2, 28) (2, 29)

501

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

502

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

503

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

504

def test_method(self):

505

# Methods

506

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

507

OP '@' (1, 0) (1, 1)

508

NAME 'staticmethod (1, 1) (1, 13)

509

NEWLINE '\\n' (1, 13) (1, 14)

510

NAME 'def' (2, 0) (2, 3)

511

NAME 'foo' (2, 4) (2, 7)

512

OP '(' (2, 7) (2, 8)

513

NAME 'x' (2, 8) (2, 9)

514

OP ',' (2, 9) (2, 10)

515

NAME 'y' (2, 10) (2, 11)

516

OP ')' (2, 11) (2, 12)

517

OP ':' (2, 12) (2, 13)

518

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

519

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

520

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

521

def test_tabs(self):

522

# Evil tabs

523

self.check_tokenize("def f():\n"

524

"\tif x\n"

525

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

526

NAME 'def' (1, 0) (1, 3)

527

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

532

INDENT '\\t' (2, 0) (2, 1)

533

NAME 'if' (2, 1) (2, 3)

534

NAME 'x' (2, 4) (2, 5)

535

NEWLINE '\\n' (2, 5) (2, 6)

536

INDENT ' \\t' (3, 0) (3, 9)

537

NAME 'pass' (3, 9) (3, 13)

538

DEDENT '' (4, 0) (4, 0)

539

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

540

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

541

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

542

def test_non_ascii_identifiers(self):

543

# Non-ascii identifiers

544

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

545

NAME 'Örter' (1, 0) (1, 5)

546

OP '=' (1, 6) (1, 7)

547

STRING "'places'" (1, 8) (1, 16)

548

NEWLINE '\\n' (1, 16) (1, 17)

549

NAME 'grün' (2, 0) (2, 4)

550

OP '=' (2, 5) (2, 6)

551

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

552

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

553

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

554

def test_unicode(self):

555

# Legacy unicode literals:

556

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

557

NAME 'Örter' (1, 0) (1, 5)

558

OP '=' (1, 6) (1, 7)

559

STRING "u'places'" (1, 8) (1, 17)

560

NEWLINE '\\n' (1, 17) (1, 18)

561

NAME 'grün' (2, 0) (2, 4)

562

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

563

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

564

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

565

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

566

def test_async(self):

567

# Async/await extension:

568

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

569

NAME 'async' (1, 0) (1, 5)

570

OP '=' (1, 6) (1, 7)

571

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

572

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

573

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

574

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

575

NAME 'a' (1, 0) (1, 1)

576

OP '=' (1, 2) (1, 3)

577

OP '(' (1, 4) (1, 5)

578

NAME 'async' (1, 5) (1, 10)

579

OP '=' (1, 11) (1, 12)

580

NUMBER '1' (1, 13) (1, 14)

581

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

582

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

583

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

584

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

585

NAME 'async' (1, 0) (1, 5)

586

OP '(' (1, 5) (1, 6)

587

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

588

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

589

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

590

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

591

NAME 'class' (1, 0) (1, 5)

592

NAME 'async' (1, 6) (1, 11)

593

OP '(' (1, 11) (1, 12)

594

NAME 'Bar' (1, 12) (1, 15)

595

OP ')' (1, 15) (1, 16)

596

OP ':' (1, 16) (1, 17)

597

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

598

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

599

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

600

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

601

NAME 'class' (1, 0) (1, 5)

602

NAME 'async' (1, 6) (1, 11)

603

OP ':' (1, 11) (1, 12)

604

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

605

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

606

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

607

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

608

NAME 'await' (1, 0) (1, 5)

609

OP '=' (1, 6) (1, 7)

610

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

611

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

612

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

613

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

614

NAME 'foo' (1, 0) (1, 3)

615

OP '.' (1, 3) (1, 4)

616

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

617

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

618

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

619

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

620

NAME 'async' (1, 0) (1, 5)

621

NAME 'for' (1, 6) (1, 9)

622

NAME 'a' (1, 10) (1, 11)

623

NAME 'in' (1, 12) (1, 14)

624

NAME 'b' (1, 15) (1, 16)

625

OP ':' (1, 16) (1, 17)

626

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

627

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

628

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

629

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

630

NAME 'async' (1, 0) (1, 5)

631

NAME 'with' (1, 6) (1, 10)

632

NAME 'a' (1, 11) (1, 12)

633

NAME 'as' (1, 13) (1, 15)

634

NAME 'b' (1, 16) (1, 17)

635

OP ':' (1, 17) (1, 18)

636

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

637

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

638

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

639

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

640

NAME 'async' (1, 0) (1, 5)

641

OP '.' (1, 5) (1, 6)

642

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

643

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

644

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

645

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

646

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

647

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

648

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

649

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

650

NAME 'async' (1, 0) (1, 5)

651

NEWLINE '\\n' (1, 5) (1, 6)

652

COMMENT '#comment' (2, 0) (2, 8)

653

NL '\\n' (2, 8) (2, 9)

654

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

655

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

656

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

657

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

658

NAME 'async' (1, 0) (1, 5)

659

NEWLINE '\\n' (1, 5) (1, 6)

660

OP '...' (2, 0) (2, 3)

661

NEWLINE '\\n' (2, 3) (2, 4)

662

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

663

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

664

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

665

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

666

NAME 'async' (1, 0) (1, 5)

667

NEWLINE '\\n' (1, 5) (1, 6)

668

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

669

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

670

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

671

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

672

NAME 'foo' (1, 0) (1, 3)

673

OP '.' (1, 3) (1, 4)

674

NAME 'async' (1, 4) (1, 9)

675

OP '+' (1, 10) (1, 11)

676

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

677

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

678

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

679

self.check_tokenize("async def foo(): pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

680

ASYNC 'async' (1, 0) (1, 5)

681

NAME 'def' (1, 6) (1, 9)

682

NAME 'foo' (1, 10) (1, 13)

683

OP '(' (1, 13) (1, 14)

684

OP ')' (1, 14) (1, 15)

685

OP ':' (1, 15) (1, 16)

686

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

687

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

688

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

689

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

697

ASYNC 'async' (1, 0) (1, 5)

698

NAME 'def' (1, 6) (1, 9)

699

NAME 'foo' (1, 10) (1, 13)

700

OP '(' (1, 13) (1, 14)

701

OP ')' (1, 14) (1, 15)

702

OP ':' (1, 15) (1, 16)

703

NEWLINE '\\n' (1, 16) (1, 17)

704

INDENT ' ' (2, 0) (2, 2)

705

NAME 'def' (2, 2) (2, 5)

706

NAME 'foo' (2, 6) (2, 9)

707

OP '(' (2, 9) (2, 10)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

708

AWAIT 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

709

OP ')' (2, 15) (2, 16)

710

OP ':' (2, 16) (2, 17)

711

NEWLINE '\\n' (2, 17) (2, 18)

712

INDENT ' ' (3, 0) (3, 4)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

713

AWAIT 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

714

OP '=' (3, 10) (3, 11)

715

NUMBER '1' (3, 12) (3, 13)

716

NEWLINE '\\n' (3, 13) (3, 14)

717

DEDENT '' (4, 2) (4, 2)

718

NAME 'if' (4, 2) (4, 4)

719

NUMBER '1' (4, 5) (4, 6)

720

OP ':' (4, 6) (4, 7)

721

NEWLINE '\\n' (4, 7) (4, 8)

722

INDENT ' ' (5, 0) (5, 4)

723

AWAIT 'await' (5, 4) (5, 9)

724

NEWLINE '\\n' (5, 9) (5, 10)

725

DEDENT '' (6, 0) (6, 0)

726

DEDENT '' (6, 0) (6, 0)

727

NAME 'async' (6, 0) (6, 5)

728

OP '+=' (6, 6) (6, 8)

729

NUMBER '1' (6, 9) (6, 10)

730

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

731

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

732

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

733

self.check_tokenize('''\

734

async def foo():

735

async for i in 1: pass''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

736

ASYNC 'async' (1, 0) (1, 5)

737

NAME 'def' (1, 6) (1, 9)

738

NAME 'foo' (1, 10) (1, 13)

739

OP '(' (1, 13) (1, 14)

740

OP ')' (1, 14) (1, 15)

741

OP ':' (1, 15) (1, 16)

742

NEWLINE '\\n' (1, 16) (1, 17)

743

INDENT ' ' (2, 0) (2, 2)

744

ASYNC 'async' (2, 2) (2, 7)

745

NAME 'for' (2, 8) (2, 11)

746

NAME 'i' (2, 12) (2, 13)

747

NAME 'in' (2, 14) (2, 16)

748

NUMBER '1' (2, 17) (2, 18)

749

OP ':' (2, 18) (2, 19)

750

NAME 'pass' (2, 20) (2, 24)

751

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

752

""")

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

753

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

754

self.check_tokenize('''async def foo(async): await''', """\

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

755

ASYNC 'async' (1, 0) (1, 5)

756

NAME 'def' (1, 6) (1, 9)

757

NAME 'foo' (1, 10) (1, 13)

758

OP '(' (1, 13) (1, 14)

759

ASYNC 'async' (1, 14) (1, 19)

760

OP ')' (1, 19) (1, 20)

761

OP ':' (1, 20) (1, 21)

762

AWAIT 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

763

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

764

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

765

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

770

771

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

772

NAME 'def' (1, 0) (1, 3)

773

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

778

NL '\\n' (2, 0) (2, 1)

779

INDENT ' ' (3, 0) (3, 2)

780

NAME 'def' (3, 2) (3, 5)

781

NAME 'baz' (3, 6) (3, 9)

782

OP '(' (3, 9) (3, 10)

783

OP ')' (3, 10) (3, 11)

784

OP ':' (3, 11) (3, 12)

785

NAME 'pass' (3, 13) (3, 17)

786

NEWLINE '\\n' (3, 17) (3, 18)

787

ASYNC 'async' (4, 2) (4, 7)

788

NAME 'def' (4, 8) (4, 11)

789

NAME 'bar' (4, 12) (4, 15)

790

OP '(' (4, 15) (4, 16)

791

OP ')' (4, 16) (4, 17)

792

OP ':' (4, 17) (4, 18)

793

NAME 'pass' (4, 19) (4, 23)

794

NEWLINE '\\n' (4, 23) (4, 24)

795

NL '\\n' (5, 0) (5, 1)

796

NAME 'await' (6, 2) (6, 7)

797

OP '=' (6, 8) (6, 9)

798

NUMBER '2' (6, 10) (6, 11)

799

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

800

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

801

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

802

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

807

808

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

809

ASYNC 'async' (1, 0) (1, 5)

810

NAME 'def' (1, 6) (1, 9)

811

NAME 'f' (1, 10) (1, 11)

812

OP '(' (1, 11) (1, 12)

813

OP ')' (1, 12) (1, 13)

814

OP ':' (1, 13) (1, 14)

815

NEWLINE '\\n' (1, 14) (1, 15)

816

NL '\\n' (2, 0) (2, 1)

817

INDENT ' ' (3, 0) (3, 2)

818

NAME 'def' (3, 2) (3, 5)

819

NAME 'baz' (3, 6) (3, 9)

820

OP '(' (3, 9) (3, 10)

821

OP ')' (3, 10) (3, 11)

822

OP ':' (3, 11) (3, 12)

823

NAME 'pass' (3, 13) (3, 17)

824

NEWLINE '\\n' (3, 17) (3, 18)

825

ASYNC 'async' (4, 2) (4, 7)

826

NAME 'def' (4, 8) (4, 11)

827

NAME 'bar' (4, 12) (4, 15)

828

OP '(' (4, 15) (4, 16)

829

OP ')' (4, 16) (4, 17)

830

OP ':' (4, 17) (4, 18)

831

NAME 'pass' (4, 19) (4, 23)

832

NEWLINE '\\n' (4, 23) (4, 24)

833

NL '\\n' (5, 0) (5, 1)

834

AWAIT 'await' (6, 2) (6, 7)

835

OP '=' (6, 8) (6, 9)

836

NUMBER '2' (6, 10) (6, 11)

837

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

838

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

839

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

840

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

841

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

842

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

843

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

844

for toknum, tokval, _, _, _ in g:

845

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

854

return untokenize(result).decode('utf-8')

855

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

856

class TestMisc(TestCase):

857

858

def test_decistmt(self):

859

# Substitute Decimals for floats in a string of statements.

860

# This is an example from the docs.

861

862

from decimal import Decimal

863

s = '+21.3e-5*-.1234/81.7'

864

self.assertEqual(decistmt(s),

865

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

866

867

# The format of the exponent is inherited from the platform C library.

868

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

869

# we're only showing 11 digits, and the 12th isn't close to 5, the

870

# rest of the output should be platform-independent.

871

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

872

873

# Output from calculations with Decimal should be identical across all

874

# platforms.

875

self.assertEqual(eval(decistmt(s)),

876

Decimal('-3.217160342717258261933904529E-7'))

877

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

878

879

class TestTokenizerAdheresToPep0263(TestCase):

880

"""

881

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

882

"""

883

884

def _testFile(self, filename):

885

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

886

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

887

888

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

889

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

890

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

891

892

def test_latin1_coding_cookie_and_utf8_bom(self):

893

"""

894

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

895

allowed encoding for the comment is 'utf-8'. The text file used in

896

this test starts with a BOM signature, but specifies latin1 as the

897

coding, so verify that a SyntaxError is raised, which matches the

898

behaviour of the interpreter when it encounters a similar condition.

899

"""

900

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

901

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

902

903

def test_no_coding_cookie_and_utf8_bom(self):

904

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

905

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

906

907

def test_utf8_coding_cookie_and_utf8_bom(self):

908

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

909

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

910

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

911

def test_bad_coding_cookie(self):

912

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

913

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

914

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

915

916

class Test_Tokenize(TestCase):

917

918

def test__tokenize_decodes_with_specified_encoding(self):

919

literal = '"ЉЊЈЁЂ"'

920

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

931

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

932

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

933

self.assertEqual(tokens, expected_tokens,

934

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

935

936

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

949

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

950

self.assertEqual(tokens, expected_tokens,

951

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

952

953

954

class TestDetectEncoding(TestCase):

955

956

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

968

lines = (

969

b'# something\n',

970

b'print(something)\n',

971

b'do_something(else)\n'

972

)

973

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

974

self.assertEqual(encoding, 'utf-8')

975

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

976

977

def test_bom_no_cookie(self):

978

lines = (

979

b'\xef\xbb\xbf# something\n',

980

b'print(something)\n',

981

b'do_something(else)\n'

982

)

983

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

984

self.assertEqual(encoding, 'utf-8-sig')

985

self.assertEqual(consumed_lines,

986

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

987

988

def test_cookie_first_line_no_bom(self):

989

lines = (

990

b'# -*- coding: latin-1 -*-\n',

991

b'print(something)\n',

992

b'do_something(else)\n'

993

)

994

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

995

self.assertEqual(encoding, 'iso-8859-1')

996

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

997

998

def test_matched_bom_and_cookie_first_line(self):

999

lines = (

1000

b'\xef\xbb\xbf# coding=utf-8\n',

1001

b'print(something)\n',

1002

b'do_something(else)\n'

1003

)

1004

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1005

self.assertEqual(encoding, 'utf-8-sig')

1006

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1007

1008

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1009

lines = (

1010

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1011

b'print(something)\n',

1012

b'do_something(else)\n'

1013

)

1014

readline = self.get_readline(lines)

1015

self.assertRaises(SyntaxError, detect_encoding, readline)

1016

1017

def test_cookie_second_line_no_bom(self):

1018

lines = (

1019

b'#! something\n',

1020

b'# vim: set fileencoding=ascii :\n',

1021

b'print(something)\n',

1022

b'do_something(else)\n'

1023

)

1024

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1025

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1026

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1027

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1028

1029

def test_matched_bom_and_cookie_second_line(self):

1030

lines = (

1031

b'\xef\xbb\xbf#! something\n',

1032

b'f# coding=utf-8\n',

1033

b'print(something)\n',

1034

b'do_something(else)\n'

1035

)

1036

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1037

self.assertEqual(encoding, 'utf-8-sig')

1038

self.assertEqual(consumed_lines,

1039

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1040

1041

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1042

lines = (

1043

b'\xef\xbb\xbf#! something\n',

1044

b'# vim: set fileencoding=ascii :\n',

1045

b'print(something)\n',

1046

b'do_something(else)\n'

1047

)

1048

readline = self.get_readline(lines)

1049

self.assertRaises(SyntaxError, detect_encoding, readline)

1050

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1051

def test_cookie_second_line_noncommented_first_line(self):

1052

lines = (

1053

b"print('\xc2\xa3')\n",

1054

b'# vim: set fileencoding=iso8859-15 :\n',

1055

b"print('\xe2\x82\xac')\n"

1056

)

1057

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1058

self.assertEqual(encoding, 'utf-8')

1059

expected = [b"print('\xc2\xa3')\n"]

1060

self.assertEqual(consumed_lines, expected)

1061

1062

def test_cookie_second_line_commented_first_line(self):

1063

lines = (

1064

b"#print('\xc2\xa3')\n",

1065

b'# vim: set fileencoding=iso8859-15 :\n',

1066

b"print('\xe2\x82\xac')\n"

1067

)

1068

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1069

self.assertEqual(encoding, 'iso8859-15')

1070

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1071

self.assertEqual(consumed_lines, expected)

1072

1073

def test_cookie_second_line_empty_first_line(self):

1074

lines = (

1075

b'\n',

1076

b'# vim: set fileencoding=iso8859-15 :\n',

1077

b"print('\xe2\x82\xac')\n"

1078

)

1079

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1080

self.assertEqual(encoding, 'iso8859-15')

1081

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1082

self.assertEqual(consumed_lines, expected)

1083

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1084

def test_latin1_normalization(self):

1085

# See get_normal_name() in tokenizer.c.

1086

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1087

"iso-8859-1-unix", "iso-latin-1-mac")

1088

for encoding in encodings:

1089

for rep in ("-", "_"):

1090

enc = encoding.replace("-", rep)

1091

lines = (b"#!/usr/bin/python\n",

1092

b"# coding: " + enc.encode("ascii") + b"\n",

1093

b"print(things)\n",

1094

b"do_something += 4\n")

1095

rl = self.get_readline(lines)

1096

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1097

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1098

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1099

def test_syntaxerror_latin1(self):

1100

# Issue 14629: need to raise SyntaxError if the first

1101

# line(s) have non-UTF-8 characters

1102

lines = (

1103

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1104

)

1105

readline = self.get_readline(lines)

1106

self.assertRaises(SyntaxError, detect_encoding, readline)

1107

1108

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1109

def test_utf8_normalization(self):

1110

# See get_normal_name() in tokenizer.c.

1111

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1112

for encoding in encodings:

1113

for rep in ("-", "_"):

1114

enc = encoding.replace("-", rep)

1115

lines = (b"#!/usr/bin/python\n",

1116

b"# coding: " + enc.encode("ascii") + b"\n",

1117

b"1 + 3\n")

1118

rl = self.get_readline(lines)

1119

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1120

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1121

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1122

def test_short_files(self):

1123

readline = self.get_readline((b'print(something)\n',))

1124

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1125

self.assertEqual(encoding, 'utf-8')

1126

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1127

1128

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1129

self.assertEqual(encoding, 'utf-8')

1130

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1131

1132

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1133

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1134

self.assertEqual(encoding, 'utf-8-sig')

1135

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1136

1137

readline = self.get_readline((b'\xef\xbb\xbf',))

1138

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1139

self.assertEqual(encoding, 'utf-8-sig')

1140

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1141

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1142

readline = self.get_readline((b'# coding: bad\n',))

1143

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1144

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1145

def test_false_encoding(self):

1146

# Issue 18873: "Encoding" detected in non-comment lines

1147

readline = self.get_readline((b'print("#coding=fake")',))

1148

encoding, consumed_lines = detect_encoding(readline)

1149

self.assertEqual(encoding, 'utf-8')

1150

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1151

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1152

def test_open(self):

1153

filename = support.TESTFN + '.py'

1154

self.addCleanup(support.unlink, filename)

1155

1156

# test coding cookie

1157

for encoding in ('iso-8859-15', 'utf-8'):

1158

with open(filename, 'w', encoding=encoding) as fp:

1159

print("# coding: %s" % encoding, file=fp)

1160

print("print('euro:\u20ac')", file=fp)

1161

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1162

self.assertEqual(fp.encoding, encoding)

1163

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1164

1165

# test BOM (no coding cookie)

1166

with open(filename, 'w', encoding='utf-8-sig') as fp:

1167

print("print('euro:\u20ac')", file=fp)

1168

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1169

self.assertEqual(fp.encoding, 'utf-8-sig')

1170

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1171

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1172

def test_filename_in_exception(self):

1173

# When possible, include the file name in the exception.

1174

path = 'some_file_path'

1175

lines = (

1176

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1177

)

1178

class Bunk:

1179

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1186

raise StopIteration

1187

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1192

ins = Bunk(lines, path)

1193

# Make sure lacking a name isn't an issue.

1194

del ins.name

1195

detect_encoding(ins.readline)

1196

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1197

ins = Bunk(lines, path)

1198

detect_encoding(ins.readline)

1199

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1200

def test_open_error(self):

1201

# Issue #23840: open() must close the binary file on error

1202

m = BytesIO(b'#coding:xxx')

1203

with mock.patch('tokenize._builtin_open', return_value=m):

1204

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1205

self.assertTrue(m.closed)

1206

1207

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1208

class TestTokenize(TestCase):

1209

1210

def test_tokenize(self):

1211

import tokenize as tokenize_module

1212

encoding = object()

1213

encoding_used = None

1214

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1215

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1216

1217

def mock__tokenize(readline, encoding):

1218

nonlocal encoding_used

1219

encoding_used = encoding

1220

out = []

1221

while True:

1222

next_line = readline()

1223

if next_line:

1224

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1234

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1235

1236

orig_detect_encoding = tokenize_module.detect_encoding

1237

orig__tokenize = tokenize_module._tokenize

1238

tokenize_module.detect_encoding = mock_detect_encoding

1239

tokenize_module._tokenize = mock__tokenize

1240

try:

1241

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1242

self.assertEqual(list(results),

1243

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1244

finally:

1245

tokenize_module.detect_encoding = orig_detect_encoding

1246

tokenize_module._tokenize = orig__tokenize

1247

1248

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1249

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1250

def test_oneline_defs(self):

1251

buf = []

1252

for i in range(500):

1253

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1258

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1259

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1260

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1261

def assertExactTypeEqual(self, opstr, *optypes):

1262

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1263

num_optypes = len(optypes)

1264

self.assertEqual(len(tokens), 2 + num_optypes)

1265

self.assertEqual(token.tok_name[tokens[0].exact_type],

1266

token.tok_name[ENCODING])

1267

for i in range(num_optypes):

1268

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1269

token.tok_name[optypes[i]])

1270

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1271

token.tok_name[token.ENDMARKER])

1272

1273

def test_exact_type(self):

1274

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1275

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1276

self.assertExactTypeEqual(':', token.COLON)

1277

self.assertExactTypeEqual(',', token.COMMA)

1278

self.assertExactTypeEqual(';', token.SEMI)

1279

self.assertExactTypeEqual('+', token.PLUS)

1280

self.assertExactTypeEqual('-', token.MINUS)

1281

self.assertExactTypeEqual('*', token.STAR)

1282

self.assertExactTypeEqual('/', token.SLASH)

1283

self.assertExactTypeEqual('|', token.VBAR)

1284

self.assertExactTypeEqual('&', token.AMPER)

1285

self.assertExactTypeEqual('<', token.LESS)

1286

self.assertExactTypeEqual('>', token.GREATER)

1287

self.assertExactTypeEqual('=', token.EQUAL)

1288

self.assertExactTypeEqual('.', token.DOT)

1289

self.assertExactTypeEqual('%', token.PERCENT)

1290

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1291

self.assertExactTypeEqual('==', token.EQEQUAL)

1292

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1293

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1294

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1295

self.assertExactTypeEqual('~', token.TILDE)

1296

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1297

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1298

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1299

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1300

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1301

self.assertExactTypeEqual('-=', token.MINEQUAL)

1302

self.assertExactTypeEqual('*=', token.STAREQUAL)

1303

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1304

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1305

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1306

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1307

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1308

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1309

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1310

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1311

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1312

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1313

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1314

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1315

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1316

1317

self.assertExactTypeEqual('a**2+b**2==c**2',

1318

NAME, token.DOUBLESTAR, NUMBER,

1319

token.PLUS,

1320

NAME, token.DOUBLESTAR, NUMBER,

1321

token.EQEQUAL,

1322

NAME, token.DOUBLESTAR, NUMBER)

1323

self.assertExactTypeEqual('{1, 2, 3}',

1324

token.LBRACE,

1325

token.NUMBER, token.COMMA,

1326

token.NUMBER, token.COMMA,

1327

token.NUMBER,

1328

token.RBRACE)

1329

self.assertExactTypeEqual('^(x & 0x1)',

1330

token.CIRCUMFLEX,

1331

token.LPAR,

1332

token.NAME, token.AMPER, token.NUMBER,

1333

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1334

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1335

def test_pathological_trailing_whitespace(self):

1336

# See http://bugs.python.org/issue16152

1337

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1338

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1339

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1340

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1341

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1342

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1343

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1348

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1349

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1350

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1351

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1352

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1353

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1354

def test_backslash_continuation(self):

1355

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1361

self.assertEqual(u.tokens, ['\\\n'])

1362

u.prev_row = 2

1363

u.add_whitespace((4, 4))

1364

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1365

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1366

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1367

def test_iter_compat(self):

1368

u = Untokenizer()

1369

token = (NAME, 'Hello')

1370

tokens = [(ENCODING, 'utf-8'), token]

1371

u.compat(token, iter([]))

1372

self.assertEqual(u.tokens, ["Hello "])

1373

u = Untokenizer()

1374

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1375

u = Untokenizer()

1376

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1377

self.assertEqual(u.encoding, 'utf-8')

1378

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1379

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1380

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1381

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1382

1383

def check_roundtrip(self, f):

1384

"""

1385

Test roundtrip for `untokenize`. `f` is an open file or a string.

1386

The source code in f is tokenized to both 5- and 2-tuples.

1387

Both sequences are converted back to source code via

1388

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1389

The test fails if the 3 pair tokenizations do not match.

1390

1391

When untokenize bugs are fixed, untokenize with 5-tuples should

1392

reproduce code that does not contain a backslash continuation

1393

following spaces. A proper test should test this.

1394

"""

1395

# Get source code and original tokenizations

1396

if isinstance(f, str):

1397

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1402

tokens5 = list(tokenize(readline))

1403

tokens2 = [tok[:2] for tok in tokens5]

1404

# Reproduce tokens2 from pairs

1405

bytes_from2 = untokenize(tokens2)

1406

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1407

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1408

self.assertEqual(tokens2_from2, tokens2)

1409

# Reproduce tokens2 from 5-tuples

1410

bytes_from5 = untokenize(tokens5)

1411

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1412

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1413

self.assertEqual(tokens2_from5, tokens2)

1414

1415

def test_roundtrip(self):

1416

# There are some standard formatting practices that are easy to get right.

1417

1418

self.check_roundtrip("if x == 1:\n"

1419

" print(x)\n")

1420

self.check_roundtrip("# This is a comment\n"

1421

"# This also")

1422

1423

# Some people use different formatting conventions, which makes

1424

# untokenize a little trickier. Note that this test involves trailing

1425

# whitespace after the colon. Note that we use hex escapes to make the

1426

# two trailing blanks apparent in the expected output.

1427

1428

self.check_roundtrip("if x == 1 : \n"

1429

" print(x)\n")

1430

fn = support.findfile("tokenize_tests.txt")

1431

with open(fn, 'rb') as f:

1432

self.check_roundtrip(f)

1433

self.check_roundtrip("if x == 1:\n"

1434

" # A comment by itself.\n"

1435

" print(x) # Comment here, too.\n"

1436

" # Another comment.\n"

1437

"after_if = True\n")

1438

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1439

" == 1):\n"

1440

" print('x==1')\n")

1441

self.check_roundtrip("class Test: # A comment here\n"

1442

" # A comment with weird indent\n"

1443

" after_com = 5\n"

1444

" def x(m): return m*5 # a one liner\n"

1445

" def y(m): # A whitespace after the colon\n"

1446

" return y*4 # 3-space indent\n")

1447

1448

# Some error-handling code

1449

self.check_roundtrip("try: import somemodule\n"

1450

"except ImportError: # comment\n"

1451

" print('Can not import' # comment2\n)"

1452

"else: print('Loaded')\n")

1453

1454

def test_continuation(self):

1455

# Balancing continuation

1456

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1462

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1468

# Backslash means line continuation, except for comments

1469

self.check_roundtrip("x=1+\\\n"

1470

"1\n"

1471

"# This is a comment\\\n"

1472

"# This also\n")

1473

self.check_roundtrip("# Comment \\\n"

1474

"x = 0")

1475

1476

def test_string_concatenation(self):

1477

# Two string literals on the same line

1478

self.check_roundtrip("'' ''")

1479

1480

def test_random_files(self):

1481

# Test roundtrip on random python modules.

1482

# pass the '-ucpu' option to process the full directory.

1483

1484

import glob, random

1485

fn = support.findfile("tokenize_tests.txt")

1486

tempdir = os.path.dirname(fn) or os.curdir

1487

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1488

1489

# Tokenize is broken on test_pep3131.py because regular expressions are

1490

# broken on the obscure unicode identifiers in it. *sigh*

1491

# With roundtrip extended to test the 5-tuple mode of untokenize,

1492

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

1493

1494

testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))

1495

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1496

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1497

1498

if not support.is_resource_enabled("cpu"):

1499

testfiles = random.sample(testfiles, 10)

1500

1501

for testfile in testfiles:

1502

with open(testfile, 'rb') as f:

1503

with self.subTest(file=testfile):

1504

self.check_roundtrip(f)

1505

1506

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1507

def roundtrip(self, code):

1508

if isinstance(code, str):

1509

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1510

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1511

1512

def test_indentation_semantics_retained(self):

1513

"""

1514

Ensure that although whitespace might be mutated in a roundtrip,

1515

the semantic meaning of the indentation remains consistent.

1516

"""

1517

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1518

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1519

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1520

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1521

1522

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1523

if __name__ == "__main__":

Serhiy Storchaka