Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

6

from unittest import TestCase, mock

7

import os

8

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

9

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

10

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

11

class TokenizeTest(TestCase):

12

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

13

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

14

# The tests can be really simple. Given a small fragment of source

15

# code, print out a table with tokens. The ENDMARKER is omitted for

16

# brevity.

17

18

def check_tokenize(self, s, expected):

19

# Format the tokens in s in a table format.

20

# The ENDMARKER is omitted.

21

result = []

22

f = BytesIO(s.encode('utf-8'))

23

for type, token, start, end, line in tokenize(f.readline):

24

if type == ENDMARKER:

25

break

26

type = tok_name[type]

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

27

result.append(f" {type:10} {token!r:13} {start} {end}")

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

28

self.assertEqual(result,

29

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

30

expected.rstrip().splitlines())

31

32

def test_basic(self):

33

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

34

NUMBER '1' (1, 0) (1, 1)

35

OP '+' (1, 2) (1, 3)

36

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

37

""")

38

self.check_tokenize("if False:\n"

39

" # NL\n"

40

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

41

NAME 'if' (1, 0) (1, 2)

42

NAME 'False' (1, 3) (1, 8)

43

OP ':' (1, 8) (1, 9)

44

NEWLINE '\\n' (1, 9) (1, 10)

45

COMMENT '# NL' (2, 4) (2, 8)

46

NL '\\n' (2, 8) (2, 9)

47

INDENT ' ' (3, 0) (3, 4)

48

NAME 'True' (3, 4) (3, 8)

49

OP '=' (3, 9) (3, 10)

50

NAME 'False' (3, 11) (3, 16)

51

COMMENT '# NEWLINE' (3, 17) (3, 26)

52

NEWLINE '\\n' (3, 26) (3, 27)

53

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

54

""")

55

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

61

with self.assertRaisesRegex(IndentationError,

62

"unindent does not match any "

63

"outer indentation level"):

64

for tok in tokenize(readline):

65

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

66

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

67

def test_int(self):

68

# Ordinary integers and binary operators

69

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

70

NUMBER '0xff' (1, 0) (1, 4)

71

OP '<=' (1, 5) (1, 7)

72

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

73

""")

74

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

75

NUMBER '0b10' (1, 0) (1, 4)

76

OP '<=' (1, 5) (1, 7)

77

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

78

""")

79

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

80

NUMBER '0o123' (1, 0) (1, 5)

81

OP '<=' (1, 6) (1, 8)

82

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

83

""")

84

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

85

NUMBER '1234567' (1, 0) (1, 7)

86

OP '>' (1, 8) (1, 9)

87

OP '~' (1, 10) (1, 11)

88

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

89

""")

90

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

91

NUMBER '2134568' (1, 0) (1, 7)

92

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

93

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

94

""")

95

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

96

OP '(' (1, 0) (1, 1)

97

OP '-' (1, 1) (1, 2)

98

NUMBER '124561' (1, 2) (1, 8)

99

OP '-' (1, 8) (1, 9)

100

NUMBER '1' (1, 9) (1, 10)

101

OP ')' (1, 10) (1, 11)

102

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

103

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

104

""")

105

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

106

NUMBER '0xdeadbeef' (1, 0) (1, 10)

107

OP '!=' (1, 11) (1, 13)

108

OP '-' (1, 14) (1, 15)

109

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

110

""")

111

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

112

NUMBER '0xdeadc0de' (1, 0) (1, 10)

113

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

114

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

115

""")

116

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

117

NUMBER '0xFF' (1, 0) (1, 4)

118

OP '&' (1, 5) (1, 6)

119

NUMBER '0x15' (1, 7) (1, 11)

120

OP '|' (1, 12) (1, 13)

121

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

122

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

123

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

124

def test_long(self):

125

# Long integers

126

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

127

NAME 'x' (1, 0) (1, 1)

128

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

129

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

130

""")

131

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

132

NAME 'x' (1, 0) (1, 1)

133

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

134

NUMBER '0xfffffffffff' (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

135

""")

136

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

137

NAME 'x' (1, 0) (1, 1)

138

OP '=' (1, 2) (1, 3)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

139

NUMBER '123141242151251616110' (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

140

""")

141

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

142

NAME 'x' (1, 0) (1, 1)

143

OP '=' (1, 2) (1, 3)

144

OP '-' (1, 4) (1, 5)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

145

NUMBER '15921590215012591' (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

146

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

148

def test_float(self):

149

# Floating point numbers

150

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

151

NAME 'x' (1, 0) (1, 1)

152

OP '=' (1, 2) (1, 3)

153

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

154

""")

155

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

156

NAME 'x' (1, 0) (1, 1)

157

OP '=' (1, 2) (1, 3)

158

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

159

""")

160

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

161

NAME 'x' (1, 0) (1, 1)

162

OP '=' (1, 2) (1, 3)

163

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

164

""")

165

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

166

NAME 'x' (1, 0) (1, 1)

167

OP '=' (1, 2) (1, 3)

168

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

169

""")

170

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

171

NAME 'x' (1, 0) (1, 1)

172

OP '=' (1, 2) (1, 3)

173

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

174

""")

175

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

176

NAME 'x' (1, 0) (1, 1)

177

OP '+' (1, 1) (1, 2)

178

NAME 'y' (1, 2) (1, 3)

179

OP '=' (1, 4) (1, 5)

180

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

181

""")

182

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

183

NAME 'x' (1, 0) (1, 1)

184

OP '=' (1, 2) (1, 3)

185

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

186

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

187

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

188

def test_string(self):

189

# String literals

190

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

191

NAME 'x' (1, 0) (1, 1)

192

OP '=' (1, 2) (1, 3)

193

STRING "''" (1, 4) (1, 6)

194

OP ';' (1, 6) (1, 7)

195

NAME 'y' (1, 8) (1, 9)

196

OP '=' (1, 10) (1, 11)

197

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

198

""")

199

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

200

NAME 'x' (1, 0) (1, 1)

201

OP '=' (1, 2) (1, 3)

202

STRING '\\'"\\'' (1, 4) (1, 7)

203

OP ';' (1, 7) (1, 8)

204

NAME 'y' (1, 9) (1, 10)

205

OP '=' (1, 11) (1, 12)

206

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

207

""")

208

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

209

NAME 'x' (1, 0) (1, 1)

210

OP '=' (1, 2) (1, 3)

211

STRING '"doesn\\'t "' (1, 4) (1, 14)

212

NAME 'shrink' (1, 14) (1, 20)

213

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

214

""")

215

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

216

NAME 'x' (1, 0) (1, 1)

217

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

218

STRING "'abc'" (1, 4) (1, 9)

219

OP '+' (1, 10) (1, 11)

220

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

221

""")

222

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

223

NAME 'y' (1, 0) (1, 1)

224

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

225

STRING '"ABC"' (1, 4) (1, 9)

226

OP '+' (1, 10) (1, 11)

227

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

228

""")

229

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

230

NAME 'x' (1, 0) (1, 1)

231

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

232

STRING "r'abc'" (1, 4) (1, 10)

233

OP '+' (1, 11) (1, 12)

234

STRING "r'ABC'" (1, 13) (1, 19)

235

OP '+' (1, 20) (1, 21)

236

STRING "R'ABC'" (1, 22) (1, 28)

237

OP '+' (1, 29) (1, 30)

238

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

239

""")

240

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

241

NAME 'y' (1, 0) (1, 1)

242

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

243

STRING 'r"abc"' (1, 4) (1, 10)

244

OP '+' (1, 11) (1, 12)

245

STRING 'r"ABC"' (1, 13) (1, 19)

246

OP '+' (1, 20) (1, 21)

247

STRING 'R"ABC"' (1, 22) (1, 28)

248

OP '+' (1, 29) (1, 30)

249

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

250

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

251

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

252

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

253

STRING "u'abc'" (1, 0) (1, 6)

254

OP '+' (1, 7) (1, 8)

255

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

256

""")

257

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

258

STRING 'u"abc"' (1, 0) (1, 6)

259

OP '+' (1, 7) (1, 8)

260

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

261

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

262

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

263

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

264

STRING "b'abc'" (1, 0) (1, 6)

265

OP '+' (1, 7) (1, 8)

266

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

267

""")

268

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

269

STRING 'b"abc"' (1, 0) (1, 6)

270

OP '+' (1, 7) (1, 8)

271

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

272

""")

273

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

274

STRING "br'abc'" (1, 0) (1, 7)

275

OP '+' (1, 8) (1, 9)

276

STRING "bR'abc'" (1, 10) (1, 17)

277

OP '+' (1, 18) (1, 19)

278

STRING "Br'abc'" (1, 20) (1, 27)

279

OP '+' (1, 28) (1, 29)

280

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

281

""")

282

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

283

STRING 'br"abc"' (1, 0) (1, 7)

284

OP '+' (1, 8) (1, 9)

285

STRING 'bR"abc"' (1, 10) (1, 17)

286

OP '+' (1, 18) (1, 19)

287

STRING 'Br"abc"' (1, 20) (1, 27)

288

OP '+' (1, 28) (1, 29)

289

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

290

""")

291

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

292

STRING "rb'abc'" (1, 0) (1, 7)

293

OP '+' (1, 8) (1, 9)

294

STRING "rB'abc'" (1, 10) (1, 17)

295

OP '+' (1, 18) (1, 19)

296

STRING "Rb'abc'" (1, 20) (1, 27)

297

OP '+' (1, 28) (1, 29)

298

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

299

""")

300

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

301

STRING 'rb"abc"' (1, 0) (1, 7)

302

OP '+' (1, 8) (1, 9)

303

STRING 'rB"abc"' (1, 10) (1, 17)

304

OP '+' (1, 18) (1, 19)

305

STRING 'Rb"abc"' (1, 20) (1, 27)

306

OP '+' (1, 28) (1, 29)

307

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

308

""")

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

309

# Check 0, 1, and 2 character string prefixes.

310

self.check_tokenize(r'"a\

311

de\

312

fg"', """\

313

STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)

314

""")

315

self.check_tokenize(r'u"a\

316

de"', """\

317

STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)

318

""")

319

self.check_tokenize(r'rb"a\

320

d"', """\

321

STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)

322

""")

323

self.check_tokenize(r'"""a\

324

b"""', """\

325

STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

326

""")

327

self.check_tokenize(r'u"""a\

328

b"""', """\

329

STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)

330

""")

331

self.check_tokenize(r'rb"""a\

332

b\

333

c"""', """\

334

STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)

335

""")

Eric V. Smith

1c8222c

2015-10-26 04:37:55 -0400

[diff] [blame]

336

self.check_tokenize('f"abc"', """\

337

STRING 'f"abc"' (1, 0) (1, 6)

338

""")

339

self.check_tokenize('fR"a{b}c"', """\

340

STRING 'fR"a{b}c"' (1, 0) (1, 9)

341

""")

342

self.check_tokenize('f"""abc"""', """\

343

STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)

344

""")

345

self.check_tokenize(r'f"abc\

346

def"', """\

347

STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)

348

""")

349

self.check_tokenize(r'Rf"abc\

350

def"', """\

351

STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)

352

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

353

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

354

def test_function(self):

355

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

356

NAME 'def' (1, 0) (1, 3)

357

NAME 'd22' (1, 4) (1, 7)

358

OP '(' (1, 7) (1, 8)

359

NAME 'a' (1, 8) (1, 9)

360

OP ',' (1, 9) (1, 10)

361

NAME 'b' (1, 11) (1, 12)

362

OP ',' (1, 12) (1, 13)

363

NAME 'c' (1, 14) (1, 15)

364

OP '=' (1, 15) (1, 16)

365

NUMBER '2' (1, 16) (1, 17)

366

OP ',' (1, 17) (1, 18)

367

NAME 'd' (1, 19) (1, 20)

368

OP '=' (1, 20) (1, 21)

369

NUMBER '2' (1, 21) (1, 22)

370

OP ',' (1, 22) (1, 23)

371

OP '*' (1, 24) (1, 25)

372

NAME 'k' (1, 25) (1, 26)

373

OP ')' (1, 26) (1, 27)

374

OP ':' (1, 27) (1, 28)

375

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

376

""")

377

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

378

NAME 'def' (1, 0) (1, 3)

379

NAME 'd01v_' (1, 4) (1, 9)

380

OP '(' (1, 9) (1, 10)

381

NAME 'a' (1, 10) (1, 11)

382

OP '=' (1, 11) (1, 12)

383

NUMBER '1' (1, 12) (1, 13)

384

OP ',' (1, 13) (1, 14)

385

OP '*' (1, 15) (1, 16)

386

NAME 'k' (1, 16) (1, 17)

387

OP ',' (1, 17) (1, 18)

388

OP '**' (1, 19) (1, 21)

389

NAME 'w' (1, 21) (1, 22)

390

OP ')' (1, 22) (1, 23)

391

OP ':' (1, 23) (1, 24)

392

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

393

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

394

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

395

def test_comparison(self):

396

# Comparison

397

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

398

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

399

NAME 'if' (1, 0) (1, 2)

400

NUMBER '1' (1, 3) (1, 4)

401

OP '<' (1, 5) (1, 6)

402

NUMBER '1' (1, 7) (1, 8)

403

OP '>' (1, 9) (1, 10)

404

NUMBER '1' (1, 11) (1, 12)

405

OP '==' (1, 13) (1, 15)

406

NUMBER '1' (1, 16) (1, 17)

407

OP '>=' (1, 18) (1, 20)

408

NUMBER '5' (1, 21) (1, 22)

409

OP '<=' (1, 23) (1, 25)

410

NUMBER '0x15' (1, 26) (1, 30)

411

OP '<=' (1, 31) (1, 33)

412

NUMBER '0x12' (1, 34) (1, 38)

413

OP '!=' (1, 39) (1, 41)

414

NUMBER '1' (1, 42) (1, 43)

415

NAME 'and' (1, 44) (1, 47)

416

NUMBER '5' (1, 48) (1, 49)

417

NAME 'in' (1, 50) (1, 52)

418

NUMBER '1' (1, 53) (1, 54)

419

NAME 'not' (1, 55) (1, 58)

420

NAME 'in' (1, 59) (1, 61)

421

NUMBER '1' (1, 62) (1, 63)

422

NAME 'is' (1, 64) (1, 66)

423

NUMBER '1' (1, 67) (1, 68)

424

NAME 'or' (1, 69) (1, 71)

425

NUMBER '5' (1, 72) (1, 73)

426

NAME 'is' (1, 74) (1, 76)

427

NAME 'not' (1, 77) (1, 80)

428

NUMBER '1' (1, 81) (1, 82)

429

OP ':' (1, 82) (1, 83)

430

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

431

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

432

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

433

def test_shift(self):

434

# Shift

435

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

436

NAME 'x' (1, 0) (1, 1)

437

OP '=' (1, 2) (1, 3)

438

NUMBER '1' (1, 4) (1, 5)

439

OP '<<' (1, 6) (1, 8)

440

NUMBER '1' (1, 9) (1, 10)

441

OP '>>' (1, 11) (1, 13)

442

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

443

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

444

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

445

def test_additive(self):

446

# Additive

447

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

448

NAME 'x' (1, 0) (1, 1)

449

OP '=' (1, 2) (1, 3)

450

NUMBER '1' (1, 4) (1, 5)

451

OP '-' (1, 6) (1, 7)

452

NAME 'y' (1, 8) (1, 9)

453

OP '+' (1, 10) (1, 11)

454

NUMBER '15' (1, 12) (1, 14)

455

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

456

NUMBER '1' (1, 17) (1, 18)

457

OP '+' (1, 19) (1, 20)

458

NUMBER '0x124' (1, 21) (1, 26)

459

OP '+' (1, 27) (1, 28)

460

NAME 'z' (1, 29) (1, 30)

461

OP '+' (1, 31) (1, 32)

462

NAME 'a' (1, 33) (1, 34)

463

OP '[' (1, 34) (1, 35)

464

NUMBER '5' (1, 35) (1, 36)

465

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

466

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

467

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

468

def test_multiplicative(self):

469

# Multiplicative

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

470

self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

471

NAME 'x' (1, 0) (1, 1)

472

OP '=' (1, 2) (1, 3)

473

NUMBER '1' (1, 4) (1, 5)

474

OP '//' (1, 5) (1, 7)

475

NUMBER '1' (1, 7) (1, 8)

476

OP '*' (1, 8) (1, 9)

477

NUMBER '1' (1, 9) (1, 10)

478

OP '/' (1, 10) (1, 11)

479

NUMBER '5' (1, 11) (1, 12)

480

OP '*' (1, 12) (1, 13)

481

NUMBER '12' (1, 13) (1, 15)

482

OP '%' (1, 15) (1, 16)

483

NUMBER '0x12' (1, 16) (1, 20)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

484

OP '@' (1, 20) (1, 21)

485

NUMBER '42' (1, 21) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

486

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

487

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

488

def test_unary(self):

489

# Unary

490

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

491

OP '~' (1, 0) (1, 1)

492

NUMBER '1' (1, 1) (1, 2)

493

OP '^' (1, 3) (1, 4)

494

NUMBER '1' (1, 5) (1, 6)

495

OP '&' (1, 7) (1, 8)

496

NUMBER '1' (1, 9) (1, 10)

497

OP '|' (1, 11) (1, 12)

498

NUMBER '1' (1, 12) (1, 13)

499

OP '^' (1, 14) (1, 15)

500

OP '-' (1, 16) (1, 17)

501

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

502

""")

503

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

504

OP '-' (1, 0) (1, 1)

505

NUMBER '1' (1, 1) (1, 2)

506

OP '*' (1, 2) (1, 3)

507

NUMBER '1' (1, 3) (1, 4)

508

OP '/' (1, 4) (1, 5)

509

NUMBER '1' (1, 5) (1, 6)

510

OP '+' (1, 6) (1, 7)

511

NUMBER '1' (1, 7) (1, 8)

512

OP '*' (1, 8) (1, 9)

513

NUMBER '1' (1, 9) (1, 10)

514

OP '//' (1, 10) (1, 12)

515

NUMBER '1' (1, 12) (1, 13)

516

OP '-' (1, 14) (1, 15)

517

OP '-' (1, 16) (1, 17)

518

OP '-' (1, 17) (1, 18)

519

OP '-' (1, 18) (1, 19)

520

NUMBER '1' (1, 19) (1, 20)

521

OP '**' (1, 20) (1, 22)

522

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

523

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

524

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

525

def test_selector(self):

526

# Selector

527

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

528

NAME 'import' (1, 0) (1, 6)

529

NAME 'sys' (1, 7) (1, 10)

530

OP ',' (1, 10) (1, 11)

531

NAME 'time' (1, 12) (1, 16)

532

NEWLINE '\\n' (1, 16) (1, 17)

533

NAME 'x' (2, 0) (2, 1)

534

OP '=' (2, 2) (2, 3)

535

NAME 'sys' (2, 4) (2, 7)

536

OP '.' (2, 7) (2, 8)

537

NAME 'modules' (2, 8) (2, 15)

538

OP '[' (2, 15) (2, 16)

539

STRING "'time'" (2, 16) (2, 22)

540

OP ']' (2, 22) (2, 23)

541

OP '.' (2, 23) (2, 24)

542

NAME 'time' (2, 24) (2, 28)

543

OP '(' (2, 28) (2, 29)

544

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

545

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

546

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

547

def test_method(self):

548

# Methods

549

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

550

OP '@' (1, 0) (1, 1)

Eric V. Smith

2015-10-16 20:45:53 -0400

[diff] [blame]

551

NAME 'staticmethod' (1, 1) (1, 13)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

552

NEWLINE '\\n' (1, 13) (1, 14)

553

NAME 'def' (2, 0) (2, 3)

554

NAME 'foo' (2, 4) (2, 7)

555

OP '(' (2, 7) (2, 8)

556

NAME 'x' (2, 8) (2, 9)

557

OP ',' (2, 9) (2, 10)

558

NAME 'y' (2, 10) (2, 11)

559

OP ')' (2, 11) (2, 12)

560

OP ':' (2, 12) (2, 13)

561

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

562

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

563

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

564

def test_tabs(self):

565

# Evil tabs

566

self.check_tokenize("def f():\n"

567

"\tif x\n"

568

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

569

NAME 'def' (1, 0) (1, 3)

570

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

575

INDENT '\\t' (2, 0) (2, 1)

576

NAME 'if' (2, 1) (2, 3)

577

NAME 'x' (2, 4) (2, 5)

578

NEWLINE '\\n' (2, 5) (2, 6)

579

INDENT ' \\t' (3, 0) (3, 9)

580

NAME 'pass' (3, 9) (3, 13)

581

DEDENT '' (4, 0) (4, 0)

582

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

583

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

584

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

585

def test_non_ascii_identifiers(self):

586

# Non-ascii identifiers

587

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

588

NAME 'Örter' (1, 0) (1, 5)

589

OP '=' (1, 6) (1, 7)

590

STRING "'places'" (1, 8) (1, 16)

591

NEWLINE '\\n' (1, 16) (1, 17)

592

NAME 'grün' (2, 0) (2, 4)

593

OP '=' (2, 5) (2, 6)

594

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

595

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

596

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

597

def test_unicode(self):

598

# Legacy unicode literals:

599

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

600

NAME 'Örter' (1, 0) (1, 5)

601

OP '=' (1, 6) (1, 7)

602

STRING "u'places'" (1, 8) (1, 17)

603

NEWLINE '\\n' (1, 17) (1, 18)

604

NAME 'grün' (2, 0) (2, 4)

605

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

606

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

607

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

608

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

609

def test_async(self):

610

# Async/await extension:

611

self.check_tokenize("async = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

612

NAME 'async' (1, 0) (1, 5)

613

OP '=' (1, 6) (1, 7)

614

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

615

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

616

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

617

self.check_tokenize("a = (async = 1)", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

618

NAME 'a' (1, 0) (1, 1)

619

OP '=' (1, 2) (1, 3)

620

OP '(' (1, 4) (1, 5)

621

NAME 'async' (1, 5) (1, 10)

622

OP '=' (1, 11) (1, 12)

623

NUMBER '1' (1, 13) (1, 14)

624

OP ')' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

625

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

626

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

627

self.check_tokenize("async()", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

628

NAME 'async' (1, 0) (1, 5)

629

OP '(' (1, 5) (1, 6)

630

OP ')' (1, 6) (1, 7)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

631

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

632

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

633

self.check_tokenize("class async(Bar):pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

634

NAME 'class' (1, 0) (1, 5)

635

NAME 'async' (1, 6) (1, 11)

636

OP '(' (1, 11) (1, 12)

637

NAME 'Bar' (1, 12) (1, 15)

638

OP ')' (1, 15) (1, 16)

639

OP ':' (1, 16) (1, 17)

640

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

641

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

642

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

643

self.check_tokenize("class async:pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

644

NAME 'class' (1, 0) (1, 5)

645

NAME 'async' (1, 6) (1, 11)

646

OP ':' (1, 11) (1, 12)

647

NAME 'pass' (1, 12) (1, 16)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

648

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

649

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

650

self.check_tokenize("await = 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

651

NAME 'await' (1, 0) (1, 5)

652

OP '=' (1, 6) (1, 7)

653

NUMBER '1' (1, 8) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

654

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

655

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

656

self.check_tokenize("foo.async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

657

NAME 'foo' (1, 0) (1, 3)

658

OP '.' (1, 3) (1, 4)

659

NAME 'async' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

660

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

661

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

662

self.check_tokenize("async for a in b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

663

NAME 'async' (1, 0) (1, 5)

664

NAME 'for' (1, 6) (1, 9)

665

NAME 'a' (1, 10) (1, 11)

666

NAME 'in' (1, 12) (1, 14)

667

NAME 'b' (1, 15) (1, 16)

668

OP ':' (1, 16) (1, 17)

669

NAME 'pass' (1, 18) (1, 22)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

670

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

671

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

672

self.check_tokenize("async with a as b: pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

673

NAME 'async' (1, 0) (1, 5)

674

NAME 'with' (1, 6) (1, 10)

675

NAME 'a' (1, 11) (1, 12)

676

NAME 'as' (1, 13) (1, 15)

677

NAME 'b' (1, 16) (1, 17)

678

OP ':' (1, 17) (1, 18)

679

NAME 'pass' (1, 19) (1, 23)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

680

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

681

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

682

self.check_tokenize("async.foo", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

683

NAME 'async' (1, 0) (1, 5)

684

OP '.' (1, 5) (1, 6)

685

NAME 'foo' (1, 6) (1, 9)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

686

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

687

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

688

self.check_tokenize("async", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

689

NAME 'async' (1, 0) (1, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

690

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

691

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

692

self.check_tokenize("async\n#comment\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

693

NAME 'async' (1, 0) (1, 5)

694

NEWLINE '\\n' (1, 5) (1, 6)

695

COMMENT '#comment' (2, 0) (2, 8)

696

NL '\\n' (2, 8) (2, 9)

697

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

698

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

699

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

700

self.check_tokenize("async\n...\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

701

NAME 'async' (1, 0) (1, 5)

702

NEWLINE '\\n' (1, 5) (1, 6)

703

OP '...' (2, 0) (2, 3)

704

NEWLINE '\\n' (2, 3) (2, 4)

705

NAME 'await' (3, 0) (3, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

706

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

707

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

708

self.check_tokenize("async\nawait", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

709

NAME 'async' (1, 0) (1, 5)

710

NEWLINE '\\n' (1, 5) (1, 6)

711

NAME 'await' (2, 0) (2, 5)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

712

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

713

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

714

self.check_tokenize("foo.async + 1", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

715

NAME 'foo' (1, 0) (1, 3)

716

OP '.' (1, 3) (1, 4)

717

NAME 'async' (1, 4) (1, 9)

718

OP '+' (1, 10) (1, 11)

719

NUMBER '1' (1, 12) (1, 13)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

720

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

721

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

722

self.check_tokenize("async def foo(): pass", """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

723

ASYNC 'async' (1, 0) (1, 5)

724

NAME 'def' (1, 6) (1, 9)

725

NAME 'foo' (1, 10) (1, 13)

726

OP '(' (1, 13) (1, 14)

727

OP ')' (1, 14) (1, 15)

728

OP ':' (1, 15) (1, 16)

729

NAME 'pass' (1, 17) (1, 21)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

730

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

731

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

732

self.check_tokenize('''\

async def foo():

def foo(await):

await = 1

if 1:

await

async += 1

''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

740

ASYNC 'async' (1, 0) (1, 5)

741

NAME 'def' (1, 6) (1, 9)

742

NAME 'foo' (1, 10) (1, 13)

743

OP '(' (1, 13) (1, 14)

744

OP ')' (1, 14) (1, 15)

745

OP ':' (1, 15) (1, 16)

746

NEWLINE '\\n' (1, 16) (1, 17)

747

INDENT ' ' (2, 0) (2, 2)

748

NAME 'def' (2, 2) (2, 5)

749

NAME 'foo' (2, 6) (2, 9)

750

OP '(' (2, 9) (2, 10)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

751

AWAIT 'await' (2, 10) (2, 15)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

752

OP ')' (2, 15) (2, 16)

753

OP ':' (2, 16) (2, 17)

754

NEWLINE '\\n' (2, 17) (2, 18)

755

INDENT ' ' (3, 0) (3, 4)

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

756

AWAIT 'await' (3, 4) (3, 9)

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

757

OP '=' (3, 10) (3, 11)

758

NUMBER '1' (3, 12) (3, 13)

759

NEWLINE '\\n' (3, 13) (3, 14)

760

DEDENT '' (4, 2) (4, 2)

761

NAME 'if' (4, 2) (4, 4)

762

NUMBER '1' (4, 5) (4, 6)

763

OP ':' (4, 6) (4, 7)

764

NEWLINE '\\n' (4, 7) (4, 8)

765

INDENT ' ' (5, 0) (5, 4)

766

AWAIT 'await' (5, 4) (5, 9)

767

NEWLINE '\\n' (5, 9) (5, 10)

768

DEDENT '' (6, 0) (6, 0)

769

DEDENT '' (6, 0) (6, 0)

770

NAME 'async' (6, 0) (6, 5)

771

OP '+=' (6, 6) (6, 8)

772

NUMBER '1' (6, 9) (6, 10)

773

NEWLINE '\\n' (6, 10) (6, 11)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

774

""")

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

775

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

776

self.check_tokenize('''\

777

async def foo():

778

async for i in 1: pass''', """\

Yury Selivanov

2015-05-11 22:57:16 -0400

[diff] [blame]

779

ASYNC 'async' (1, 0) (1, 5)

780

NAME 'def' (1, 6) (1, 9)

781

NAME 'foo' (1, 10) (1, 13)

782

OP '(' (1, 13) (1, 14)

783

OP ')' (1, 14) (1, 15)

784

OP ':' (1, 15) (1, 16)

785

NEWLINE '\\n' (1, 16) (1, 17)

786

INDENT ' ' (2, 0) (2, 2)

787

ASYNC 'async' (2, 2) (2, 7)

788

NAME 'for' (2, 8) (2, 11)

789

NAME 'i' (2, 12) (2, 13)

790

NAME 'in' (2, 14) (2, 16)

791

NUMBER '1' (2, 17) (2, 18)

792

OP ':' (2, 18) (2, 19)

793

NAME 'pass' (2, 20) (2, 24)

794

DEDENT '' (3, 0) (3, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

795

""")

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

796

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

797

self.check_tokenize('''async def foo(async): await''', """\

Yury Selivanov

2015-07-22 13:33:45 +0300

[diff] [blame]

798

ASYNC 'async' (1, 0) (1, 5)

799

NAME 'def' (1, 6) (1, 9)

800

NAME 'foo' (1, 10) (1, 13)

801

OP '(' (1, 13) (1, 14)

802

ASYNC 'async' (1, 14) (1, 19)

803

OP ')' (1, 19) (1, 20)

804

OP ':' (1, 20) (1, 21)

805

AWAIT 'await' (1, 22) (1, 27)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

806

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

807

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

808

self.check_tokenize('''\

def f():

def baz(): pass

async def bar(): pass

813

814

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

815

NAME 'def' (1, 0) (1, 3)

816

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

821

NL '\\n' (2, 0) (2, 1)

822

INDENT ' ' (3, 0) (3, 2)

823

NAME 'def' (3, 2) (3, 5)

824

NAME 'baz' (3, 6) (3, 9)

825

OP '(' (3, 9) (3, 10)

826

OP ')' (3, 10) (3, 11)

827

OP ':' (3, 11) (3, 12)

828

NAME 'pass' (3, 13) (3, 17)

829

NEWLINE '\\n' (3, 17) (3, 18)

830

ASYNC 'async' (4, 2) (4, 7)

831

NAME 'def' (4, 8) (4, 11)

832

NAME 'bar' (4, 12) (4, 15)

833

OP '(' (4, 15) (4, 16)

834

OP ')' (4, 16) (4, 17)

835

OP ':' (4, 17) (4, 18)

836

NAME 'pass' (4, 19) (4, 23)

837

NEWLINE '\\n' (4, 23) (4, 24)

838

NL '\\n' (5, 0) (5, 1)

839

NAME 'await' (6, 2) (6, 7)

840

OP '=' (6, 8) (6, 9)

841

NUMBER '2' (6, 10) (6, 11)

842

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

843

""")

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

844

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

845

self.check_tokenize('''\

async def f():

def baz(): pass

async def bar(): pass

850

851

await = 2''', """\

Yury Selivanov

2015-07-23 15:01:58 +0300

[diff] [blame]

852

ASYNC 'async' (1, 0) (1, 5)

853

NAME 'def' (1, 6) (1, 9)

854

NAME 'f' (1, 10) (1, 11)

855

OP '(' (1, 11) (1, 12)

856

OP ')' (1, 12) (1, 13)

857

OP ':' (1, 13) (1, 14)

858

NEWLINE '\\n' (1, 14) (1, 15)

859

NL '\\n' (2, 0) (2, 1)

860

INDENT ' ' (3, 0) (3, 2)

861

NAME 'def' (3, 2) (3, 5)

862

NAME 'baz' (3, 6) (3, 9)

863

OP '(' (3, 9) (3, 10)

864

OP ')' (3, 10) (3, 11)

865

OP ':' (3, 11) (3, 12)

866

NAME 'pass' (3, 13) (3, 17)

867

NEWLINE '\\n' (3, 17) (3, 18)

868

ASYNC 'async' (4, 2) (4, 7)

869

NAME 'def' (4, 8) (4, 11)

870

NAME 'bar' (4, 12) (4, 15)

871

OP '(' (4, 15) (4, 16)

872

OP ')' (4, 16) (4, 17)

873

OP ':' (4, 17) (4, 18)

874

NAME 'pass' (4, 19) (4, 23)

875

NEWLINE '\\n' (4, 23) (4, 24)

876

NL '\\n' (5, 0) (5, 1)

877

AWAIT 'await' (6, 2) (6, 7)

878

OP '=' (6, 8) (6, 9)

879

NUMBER '2' (6, 10) (6, 11)

880

DEDENT '' (7, 0) (7, 0)

Serhiy Storchaka

2015-10-06 18:23:12 +0300

[diff] [blame]

881

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

882

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

883

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

884

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

885

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

886

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

887

for toknum, tokval, _, _, _ in g:

888

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

897

return untokenize(result).decode('utf-8')

898

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

899

class TestMisc(TestCase):

900

901

def test_decistmt(self):

902

# Substitute Decimals for floats in a string of statements.

903

# This is an example from the docs.

904

905

from decimal import Decimal

906

s = '+21.3e-5*-.1234/81.7'

907

self.assertEqual(decistmt(s),

908

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

909

910

# The format of the exponent is inherited from the platform C library.

911

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

912

# we're only showing 11 digits, and the 12th isn't close to 5, the

913

# rest of the output should be platform-independent.

914

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

915

916

# Output from calculations with Decimal should be identical across all

917

# platforms.

918

self.assertEqual(eval(decistmt(s)),

919

Decimal('-3.217160342717258261933904529E-7'))

920

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

921

922

class TestTokenizerAdheresToPep0263(TestCase):

923

"""

924

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

925

"""

926

927

def _testFile(self, filename):

928

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

929

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

930

931

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

932

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

933

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

934

935

def test_latin1_coding_cookie_and_utf8_bom(self):

936

"""

937

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

938

allowed encoding for the comment is 'utf-8'. The text file used in

939

this test starts with a BOM signature, but specifies latin1 as the

940

coding, so verify that a SyntaxError is raised, which matches the

941

behaviour of the interpreter when it encounters a similar condition.

942

"""

943

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

944

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

945

946

def test_no_coding_cookie_and_utf8_bom(self):

947

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

948

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

949

950

def test_utf8_coding_cookie_and_utf8_bom(self):

951

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

952

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

953

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

954

def test_bad_coding_cookie(self):

955

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

956

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

957

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

958

959

class Test_Tokenize(TestCase):

960

961

def test__tokenize_decodes_with_specified_encoding(self):

962

literal = '"ЉЊЈЁЂ"'

963

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

974

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

975

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

976

self.assertEqual(tokens, expected_tokens,

977

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

978

979

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

992

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

993

self.assertEqual(tokens, expected_tokens,

994

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

995

996

997

class TestDetectEncoding(TestCase):

998

999

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

1011

lines = (

1012

b'# something\n',

1013

b'print(something)\n',

1014

b'do_something(else)\n'

1015

)

1016

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1017

self.assertEqual(encoding, 'utf-8')

1018

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1019

1020

def test_bom_no_cookie(self):

1021

lines = (

1022

b'\xef\xbb\xbf# something\n',

1023

b'print(something)\n',

1024

b'do_something(else)\n'

1025

)

1026

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1027

self.assertEqual(encoding, 'utf-8-sig')

1028

self.assertEqual(consumed_lines,

1029

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1030

1031

def test_cookie_first_line_no_bom(self):

1032

lines = (

1033

b'# -*- coding: latin-1 -*-\n',

1034

b'print(something)\n',

1035

b'do_something(else)\n'

1036

)

1037

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1038

self.assertEqual(encoding, 'iso-8859-1')

1039

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1040

1041

def test_matched_bom_and_cookie_first_line(self):

1042

lines = (

1043

b'\xef\xbb\xbf# coding=utf-8\n',

1044

b'print(something)\n',

1045

b'do_something(else)\n'

1046

)

1047

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1048

self.assertEqual(encoding, 'utf-8-sig')

1049

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1050

1051

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

1052

lines = (

1053

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

1054

b'print(something)\n',

1055

b'do_something(else)\n'

1056

)

1057

readline = self.get_readline(lines)

1058

self.assertRaises(SyntaxError, detect_encoding, readline)

1059

1060

def test_cookie_second_line_no_bom(self):

1061

lines = (

1062

b'#! something\n',

1063

b'# vim: set fileencoding=ascii :\n',

1064

b'print(something)\n',

1065

b'do_something(else)\n'

1066

)

1067

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1068

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1069

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1070

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1071

1072

def test_matched_bom_and_cookie_second_line(self):

1073

lines = (

1074

b'\xef\xbb\xbf#! something\n',

1075

b'f# coding=utf-8\n',

1076

b'print(something)\n',

1077

b'do_something(else)\n'

1078

)

1079

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1080

self.assertEqual(encoding, 'utf-8-sig')

1081

self.assertEqual(consumed_lines,

1082

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1083

1084

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

1085

lines = (

1086

b'\xef\xbb\xbf#! something\n',

1087

b'# vim: set fileencoding=ascii :\n',

1088

b'print(something)\n',

1089

b'do_something(else)\n'

1090

)

1091

readline = self.get_readline(lines)

1092

self.assertRaises(SyntaxError, detect_encoding, readline)

1093

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

1094

def test_cookie_second_line_noncommented_first_line(self):

1095

lines = (

1096

b"print('\xc2\xa3')\n",

1097

b'# vim: set fileencoding=iso8859-15 :\n',

1098

b"print('\xe2\x82\xac')\n"

1099

)

1100

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1101

self.assertEqual(encoding, 'utf-8')

1102

expected = [b"print('\xc2\xa3')\n"]

1103

self.assertEqual(consumed_lines, expected)

1104

1105

def test_cookie_second_line_commented_first_line(self):

1106

lines = (

1107

b"#print('\xc2\xa3')\n",

1108

b'# vim: set fileencoding=iso8859-15 :\n',

1109

b"print('\xe2\x82\xac')\n"

1110

)

1111

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1112

self.assertEqual(encoding, 'iso8859-15')

1113

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

1114

self.assertEqual(consumed_lines, expected)

1115

1116

def test_cookie_second_line_empty_first_line(self):

1117

lines = (

1118

b'\n',

1119

b'# vim: set fileencoding=iso8859-15 :\n',

1120

b"print('\xe2\x82\xac')\n"

1121

)

1122

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

1123

self.assertEqual(encoding, 'iso8859-15')

1124

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

1125

self.assertEqual(consumed_lines, expected)

1126

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1127

def test_latin1_normalization(self):

1128

# See get_normal_name() in tokenizer.c.

1129

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

1130

"iso-8859-1-unix", "iso-latin-1-mac")

1131

for encoding in encodings:

1132

for rep in ("-", "_"):

1133

enc = encoding.replace("-", rep)

1134

lines = (b"#!/usr/bin/python\n",

1135

b"# coding: " + enc.encode("ascii") + b"\n",

1136

b"print(things)\n",

1137

b"do_something += 4\n")

1138

rl = self.get_readline(lines)

1139

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1140

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1141

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

1142

def test_syntaxerror_latin1(self):

1143

# Issue 14629: need to raise SyntaxError if the first

1144

# line(s) have non-UTF-8 characters

1145

lines = (

1146

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1147

)

1148

readline = self.get_readline(lines)

1149

self.assertRaises(SyntaxError, detect_encoding, readline)

1150

1151

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1152

def test_utf8_normalization(self):

1153

# See get_normal_name() in tokenizer.c.

1154

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

1155

for encoding in encodings:

1156

for rep in ("-", "_"):

1157

enc = encoding.replace("-", rep)

1158

lines = (b"#!/usr/bin/python\n",

1159

b"# coding: " + enc.encode("ascii") + b"\n",

1160

b"1 + 3\n")

1161

rl = self.get_readline(lines)

1162

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1163

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

1164

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1165

def test_short_files(self):

1166

readline = self.get_readline((b'print(something)\n',))

1167

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1168

self.assertEqual(encoding, 'utf-8')

1169

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1170

1171

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1172

self.assertEqual(encoding, 'utf-8')

1173

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1174

1175

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

1176

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1177

self.assertEqual(encoding, 'utf-8-sig')

1178

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1179

1180

readline = self.get_readline((b'\xef\xbb\xbf',))

1181

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1182

self.assertEqual(encoding, 'utf-8-sig')

1183

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1184

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1185

readline = self.get_readline((b'# coding: bad\n',))

1186

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1187

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1188

def test_false_encoding(self):

1189

# Issue 18873: "Encoding" detected in non-comment lines

1190

readline = self.get_readline((b'print("#coding=fake")',))

1191

encoding, consumed_lines = detect_encoding(readline)

1192

self.assertEqual(encoding, 'utf-8')

1193

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1194

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1195

def test_open(self):

1196

filename = support.TESTFN + '.py'

1197

self.addCleanup(support.unlink, filename)

1198

1199

# test coding cookie

1200

for encoding in ('iso-8859-15', 'utf-8'):

1201

with open(filename, 'w', encoding=encoding) as fp:

1202

print("# coding: %s" % encoding, file=fp)

1203

print("print('euro:\u20ac')", file=fp)

1204

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1205

self.assertEqual(fp.encoding, encoding)

1206

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1207

1208

# test BOM (no coding cookie)

1209

with open(filename, 'w', encoding='utf-8-sig') as fp:

1210

print("print('euro:\u20ac')", file=fp)

1211

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1212

self.assertEqual(fp.encoding, 'utf-8-sig')

1213

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1214

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1215

def test_filename_in_exception(self):

1216

# When possible, include the file name in the exception.

1217

path = 'some_file_path'

1218

lines = (

1219

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1220

)

1221

class Bunk:

1222

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1229

raise StopIteration

1230

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1235

ins = Bunk(lines, path)

1236

# Make sure lacking a name isn't an issue.

1237

del ins.name

1238

detect_encoding(ins.readline)

1239

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1240

ins = Bunk(lines, path)

1241

detect_encoding(ins.readline)

1242

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1243

def test_open_error(self):

1244

# Issue #23840: open() must close the binary file on error

1245

m = BytesIO(b'#coding:xxx')

1246

with mock.patch('tokenize._builtin_open', return_value=m):

1247

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1248

self.assertTrue(m.closed)

1249

1250

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1251

class TestTokenize(TestCase):

1252

1253

def test_tokenize(self):

1254

import tokenize as tokenize_module

1255

encoding = object()

1256

encoding_used = None

1257

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1258

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1259

1260

def mock__tokenize(readline, encoding):

1261

nonlocal encoding_used

1262

encoding_used = encoding

1263

out = []

1264

while True:

1265

next_line = readline()

1266

if next_line:

1267

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1277

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1278

1279

orig_detect_encoding = tokenize_module.detect_encoding

1280

orig__tokenize = tokenize_module._tokenize

1281

tokenize_module.detect_encoding = mock_detect_encoding

1282

tokenize_module._tokenize = mock__tokenize

1283

try:

1284

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1285

self.assertEqual(list(results),

1286

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1287

finally:

1288

tokenize_module.detect_encoding = orig_detect_encoding

1289

tokenize_module._tokenize = orig__tokenize

1290

1291

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1292

Yury Selivanov

8085b80

2015-05-18 12:50:52 -0400

[diff] [blame]

1293

def test_oneline_defs(self):

1294

buf = []

1295

for i in range(500):

1296

buf.append('def i{i}(): return {i}'.format(i=i))

buf.append('OK')

buf = '\n'.join(buf)

# Test that 500 consequent, one-line defs is OK

1301

toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))

1302

self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER

1303

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1304

def assertExactTypeEqual(self, opstr, *optypes):

1305

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1306

num_optypes = len(optypes)

1307

self.assertEqual(len(tokens), 2 + num_optypes)

1308

self.assertEqual(token.tok_name[tokens[0].exact_type],

1309

token.tok_name[ENCODING])

1310

for i in range(num_optypes):

1311

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1312

token.tok_name[optypes[i]])

1313

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1314

token.tok_name[token.ENDMARKER])

1315

1316

def test_exact_type(self):

1317

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1318

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1319

self.assertExactTypeEqual(':', token.COLON)

1320

self.assertExactTypeEqual(',', token.COMMA)

1321

self.assertExactTypeEqual(';', token.SEMI)

1322

self.assertExactTypeEqual('+', token.PLUS)

1323

self.assertExactTypeEqual('-', token.MINUS)

1324

self.assertExactTypeEqual('*', token.STAR)

1325

self.assertExactTypeEqual('/', token.SLASH)

1326

self.assertExactTypeEqual('|', token.VBAR)

1327

self.assertExactTypeEqual('&', token.AMPER)

1328

self.assertExactTypeEqual('<', token.LESS)

1329

self.assertExactTypeEqual('>', token.GREATER)

1330

self.assertExactTypeEqual('=', token.EQUAL)

1331

self.assertExactTypeEqual('.', token.DOT)

1332

self.assertExactTypeEqual('%', token.PERCENT)

1333

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1334

self.assertExactTypeEqual('==', token.EQEQUAL)

1335

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1336

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1337

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1338

self.assertExactTypeEqual('~', token.TILDE)

1339

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1340

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1341

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1342

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1343

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1344

self.assertExactTypeEqual('-=', token.MINEQUAL)

1345

self.assertExactTypeEqual('*=', token.STAREQUAL)

1346

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1347

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1348

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1349

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1350

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1351

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1352

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1353

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1354

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1355

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1356

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1357

self.assertExactTypeEqual('@', token.AT)

Benjamin Peterson

d51374e

2014-04-09 23:55:56 -0400

[diff] [blame]

1358

self.assertExactTypeEqual('@=', token.ATEQUAL)

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1359

1360

self.assertExactTypeEqual('a**2+b**2==c**2',

1361

NAME, token.DOUBLESTAR, NUMBER,

1362

token.PLUS,

1363

NAME, token.DOUBLESTAR, NUMBER,

1364

token.EQEQUAL,

1365

NAME, token.DOUBLESTAR, NUMBER)

1366

self.assertExactTypeEqual('{1, 2, 3}',

1367

token.LBRACE,

1368

token.NUMBER, token.COMMA,

1369

token.NUMBER, token.COMMA,

1370

token.NUMBER,

1371

token.RBRACE)

1372

self.assertExactTypeEqual('^(x & 0x1)',

1373

token.CIRCUMFLEX,

1374

token.LPAR,

1375

token.NAME, token.AMPER, token.NUMBER,

1376

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1377

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1378

def test_pathological_trailing_whitespace(self):

1379

# See http://bugs.python.org/issue16152

1380

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1381

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1382

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1383

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1384

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1385

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1386

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1391

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1392

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1393

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1394

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1395

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1396

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1397

def test_backslash_continuation(self):

1398

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1404

self.assertEqual(u.tokens, ['\\\n'])

1405

u.prev_row = 2

1406

u.add_whitespace((4, 4))

1407

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1408

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1409

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1410

def test_iter_compat(self):

1411

u = Untokenizer()

1412

token = (NAME, 'Hello')

1413

tokens = [(ENCODING, 'utf-8'), token]

1414

u.compat(token, iter([]))

1415

self.assertEqual(u.tokens, ["Hello "])

1416

u = Untokenizer()

1417

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1418

u = Untokenizer()

1419

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1420

self.assertEqual(u.encoding, 'utf-8')

1421

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1422

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1423

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1424

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1425

1426

def check_roundtrip(self, f):

1427

"""

1428

Test roundtrip for `untokenize`. `f` is an open file or a string.

1429

The source code in f is tokenized to both 5- and 2-tuples.

1430

Both sequences are converted back to source code via

1431

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1432

The test fails if the 3 pair tokenizations do not match.

1433

1434

When untokenize bugs are fixed, untokenize with 5-tuples should

1435

reproduce code that does not contain a backslash continuation

1436

following spaces. A proper test should test this.

1437

"""

1438

# Get source code and original tokenizations

1439

if isinstance(f, str):

1440

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1445

tokens5 = list(tokenize(readline))

1446

tokens2 = [tok[:2] for tok in tokens5]

1447

# Reproduce tokens2 from pairs

1448

bytes_from2 = untokenize(tokens2)

1449

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1450

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1451

self.assertEqual(tokens2_from2, tokens2)

1452

# Reproduce tokens2 from 5-tuples

1453

bytes_from5 = untokenize(tokens5)

1454

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1455

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1456

self.assertEqual(tokens2_from5, tokens2)

1457

1458

def test_roundtrip(self):

1459

# There are some standard formatting practices that are easy to get right.

1460

1461

self.check_roundtrip("if x == 1:\n"

1462

" print(x)\n")

1463

self.check_roundtrip("# This is a comment\n"

1464

"# This also")

1465

1466

# Some people use different formatting conventions, which makes

1467

# untokenize a little trickier. Note that this test involves trailing

1468

# whitespace after the colon. Note that we use hex escapes to make the

1469

# two trailing blanks apparent in the expected output.

1470

1471

self.check_roundtrip("if x == 1 : \n"

1472

" print(x)\n")

1473

fn = support.findfile("tokenize_tests.txt")

1474

with open(fn, 'rb') as f:

1475

self.check_roundtrip(f)

1476

self.check_roundtrip("if x == 1:\n"

1477

" # A comment by itself.\n"

1478

" print(x) # Comment here, too.\n"

1479

" # Another comment.\n"

1480

"after_if = True\n")

1481

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1482

" == 1):\n"

1483

" print('x==1')\n")

1484

self.check_roundtrip("class Test: # A comment here\n"

1485

" # A comment with weird indent\n"

1486

" after_com = 5\n"

1487

" def x(m): return m*5 # a one liner\n"

1488

" def y(m): # A whitespace after the colon\n"

1489

" return y*4 # 3-space indent\n")

1490

1491

# Some error-handling code

1492

self.check_roundtrip("try: import somemodule\n"

1493

"except ImportError: # comment\n"

1494

" print('Can not import' # comment2\n)"

1495

"else: print('Loaded')\n")

1496

1497

def test_continuation(self):

1498

# Balancing continuation

1499

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1505

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1511

# Backslash means line continuation, except for comments

1512

self.check_roundtrip("x=1+\\\n"

1513

"1\n"

1514

"# This is a comment\\\n"

1515

"# This also\n")

1516

self.check_roundtrip("# Comment \\\n"

1517

"x = 0")

1518

1519

def test_string_concatenation(self):

1520

# Two string literals on the same line

1521

self.check_roundtrip("'' ''")

1522

1523

def test_random_files(self):

1524

# Test roundtrip on random python modules.

1525

# pass the '-ucpu' option to process the full directory.

1526

1527

import glob, random

1528

fn = support.findfile("tokenize_tests.txt")

1529

tempdir = os.path.dirname(fn) or os.curdir

1530

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1531

1532

# Tokenize is broken on test_pep3131.py because regular expressions are

1533

# broken on the obscure unicode identifiers in it. *sigh*

1534

# With roundtrip extended to test the 5-tuple mode of untokenize,

1535

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

1536

1537

testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))

1538

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1539

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1540

1541

if not support.is_resource_enabled("cpu"):

1542

testfiles = random.sample(testfiles, 10)

1543

1544

for testfile in testfiles:

1545

with open(testfile, 'rb') as f:

1546

with self.subTest(file=testfile):

1547

self.check_roundtrip(f)

1548

1549

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1550

def roundtrip(self, code):

1551

if isinstance(code, str):

1552

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1553

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1554

1555

def test_indentation_semantics_retained(self):

1556

"""

1557

Ensure that although whitespace might be mutated in a roundtrip,

1558

the semantic meaning of the indentation remains consistent.

1559

"""

1560

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1561

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1562

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1563

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1564

1565

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1566

if __name__ == "__main__":

Serhiy Storchaka