Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2015-10-06 18:16:28 +0300

[diff] [blame]

1

from test import support

2

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

3

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

4

open as tokenize_open, Untokenizer)

5

from io import BytesIO

6

from unittest import TestCase, mock

7

import os

8

import token

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

9

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

10

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

11

class TokenizeTest(TestCase):

12

# Tests for the tokenize module.

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

13

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

14

# The tests can be really simple. Given a small fragment of source

15

# code, print out a table with tokens. The ENDMARKER is omitted for

16

# brevity.

17

18

def check_tokenize(self, s, expected):

19

# Format the tokens in s in a table format.

20

# The ENDMARKER is omitted.

21

result = []

22

f = BytesIO(s.encode('utf-8'))

23

for type, token, start, end, line in tokenize(f.readline):

24

if type == ENDMARKER:

25

break

26

type = tok_name[type]

27

result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %

28

locals())

29

self.assertEqual(result,

30

[" ENCODING 'utf-8' (0, 0) (0, 0)"] +

31

expected.rstrip().splitlines())

32

33

def test_basic(self):

34

self.check_tokenize("1 + 1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

35

NUMBER '1' (1, 0) (1, 1)

36

OP '+' (1, 2) (1, 3)

37

NUMBER '1' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

38

""")

39

self.check_tokenize("if False:\n"

40

" # NL\n"

41

" True = False # NEWLINE\n", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

42

NAME 'if' (1, 0) (1, 2)

43

NAME 'False' (1, 3) (1, 8)

44

OP ':' (1, 8) (1, 9)

45

NEWLINE '\\n' (1, 9) (1, 10)

46

COMMENT '# NL' (2, 4) (2, 8)

47

NL '\\n' (2, 8) (2, 9)

48

INDENT ' ' (3, 0) (3, 4)

49

NAME 'True' (3, 4) (3, 8)

50

OP '=' (3, 9) (3, 10)

51

NAME 'False' (3, 11) (3, 16)

52

COMMENT '# NEWLINE' (3, 17) (3, 26)

53

NEWLINE '\\n' (3, 26) (3, 27)

54

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

55

""")

56

indent_error_file = b"""\

def k(x):

x += 2

x += 5

"""

readline = BytesIO(indent_error_file).readline

62

with self.assertRaisesRegex(IndentationError,

63

"unindent does not match any "

64

"outer indentation level"):

65

for tok in tokenize(readline):

66

pass

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

67

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

68

def test_int(self):

69

# Ordinary integers and binary operators

70

self.check_tokenize("0xff <= 255", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

71

NUMBER '0xff' (1, 0) (1, 4)

72

OP '<=' (1, 5) (1, 7)

73

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

74

""")

75

self.check_tokenize("0b10 <= 255", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

76

NUMBER '0b10' (1, 0) (1, 4)

77

OP '<=' (1, 5) (1, 7)

78

NUMBER '255' (1, 8) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

79

""")

80

self.check_tokenize("0o123 <= 0O123", """\

Eric Smith

74ca557

2008-03-17 19:49:19 +0000

[diff] [blame]

81

NUMBER '0o123' (1, 0) (1, 5)

82

OP '<=' (1, 6) (1, 8)

83

NUMBER '0O123' (1, 9) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

84

""")

85

self.check_tokenize("1234567 > ~0x15", """\

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

86

NUMBER '1234567' (1, 0) (1, 7)

87

OP '>' (1, 8) (1, 9)

88

OP '~' (1, 10) (1, 11)

89

NUMBER '0x15' (1, 11) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

90

""")

91

self.check_tokenize("2134568 != 1231515", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

92

NUMBER '2134568' (1, 0) (1, 7)

93

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

94

NUMBER '1231515' (1, 11) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

95

""")

96

self.check_tokenize("(-124561-1) & 200000000", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

97

OP '(' (1, 0) (1, 1)

98

OP '-' (1, 1) (1, 2)

99

NUMBER '124561' (1, 2) (1, 8)

100

OP '-' (1, 8) (1, 9)

101

NUMBER '1' (1, 9) (1, 10)

102

OP ')' (1, 10) (1, 11)

103

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

104

NUMBER '200000000' (1, 14) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

105

""")

106

self.check_tokenize("0xdeadbeef != -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

107

NUMBER '0xdeadbeef' (1, 0) (1, 10)

108

OP '!=' (1, 11) (1, 13)

109

OP '-' (1, 14) (1, 15)

110

NUMBER '1' (1, 15) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

111

""")

112

self.check_tokenize("0xdeadc0de & 12345", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

113

NUMBER '0xdeadc0de' (1, 0) (1, 10)

114

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

115

NUMBER '12345' (1, 13) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

116

""")

117

self.check_tokenize("0xFF & 0x15 | 1234", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

118

NUMBER '0xFF' (1, 0) (1, 4)

119

OP '&' (1, 5) (1, 6)

120

NUMBER '0x15' (1, 7) (1, 11)

121

OP '|' (1, 12) (1, 13)

122

NUMBER '1234' (1, 14) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

123

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

124

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

125

def test_long(self):

126

# Long integers

127

self.check_tokenize("x = 0", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

128

NAME 'x' (1, 0) (1, 1)

129

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

130

NUMBER '0' (1, 4) (1, 5)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

131

""")

132

self.check_tokenize("x = 0xfffffffffff", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

133

NAME 'x' (1, 0) (1, 1)

134

OP '=' (1, 2) (1, 3)

135

NUMBER '0xffffffffff (1, 4) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

136

""")

137

self.check_tokenize("x = 123141242151251616110", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

138

NAME 'x' (1, 0) (1, 1)

139

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

140

NUMBER '123141242151 (1, 4) (1, 25)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

141

""")

142

self.check_tokenize("x = -15921590215012591", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

143

NAME 'x' (1, 0) (1, 1)

144

OP '=' (1, 2) (1, 3)

145

OP '-' (1, 4) (1, 5)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

146

NUMBER '159215902150 (1, 5) (1, 22)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

147

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

148

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

149

def test_float(self):

150

# Floating point numbers

151

self.check_tokenize("x = 3.14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

152

NAME 'x' (1, 0) (1, 1)

153

OP '=' (1, 2) (1, 3)

154

NUMBER '3.14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

155

""")

156

self.check_tokenize("x = 314159.", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

157

NAME 'x' (1, 0) (1, 1)

158

OP '=' (1, 2) (1, 3)

159

NUMBER '314159.' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

160

""")

161

self.check_tokenize("x = .314159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

162

NAME 'x' (1, 0) (1, 1)

163

OP '=' (1, 2) (1, 3)

164

NUMBER '.314159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

165

""")

166

self.check_tokenize("x = 3e14159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

167

NAME 'x' (1, 0) (1, 1)

168

OP '=' (1, 2) (1, 3)

169

NUMBER '3e14159' (1, 4) (1, 11)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

170

""")

171

self.check_tokenize("x = 3E123", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

172

NAME 'x' (1, 0) (1, 1)

173

OP '=' (1, 2) (1, 3)

174

NUMBER '3E123' (1, 4) (1, 9)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

175

""")

176

self.check_tokenize("x+y = 3e-1230", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

177

NAME 'x' (1, 0) (1, 1)

178

OP '+' (1, 1) (1, 2)

179

NAME 'y' (1, 2) (1, 3)

180

OP '=' (1, 4) (1, 5)

181

NUMBER '3e-1230' (1, 6) (1, 13)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

182

""")

183

self.check_tokenize("x = 3.14e159", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

184

NAME 'x' (1, 0) (1, 1)

185

OP '=' (1, 2) (1, 3)

186

NUMBER '3.14e159' (1, 4) (1, 12)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

187

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

188

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

189

def test_string(self):

190

# String literals

191

self.check_tokenize("x = ''; y = \"\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

192

NAME 'x' (1, 0) (1, 1)

193

OP '=' (1, 2) (1, 3)

194

STRING "''" (1, 4) (1, 6)

195

OP ';' (1, 6) (1, 7)

196

NAME 'y' (1, 8) (1, 9)

197

OP '=' (1, 10) (1, 11)

198

STRING '""' (1, 12) (1, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

199

""")

200

self.check_tokenize("x = '\"'; y = \"'\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

201

NAME 'x' (1, 0) (1, 1)

202

OP '=' (1, 2) (1, 3)

203

STRING '\\'"\\'' (1, 4) (1, 7)

204

OP ';' (1, 7) (1, 8)

205

NAME 'y' (1, 9) (1, 10)

206

OP '=' (1, 11) (1, 12)

207

STRING '"\\'"' (1, 13) (1, 16)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

208

""")

209

self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

210

NAME 'x' (1, 0) (1, 1)

211

OP '=' (1, 2) (1, 3)

212

STRING '"doesn\\'t "' (1, 4) (1, 14)

213

NAME 'shrink' (1, 14) (1, 20)

214

STRING '", does it"' (1, 20) (1, 31)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

215

""")

216

self.check_tokenize("x = 'abc' + 'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

217

NAME 'x' (1, 0) (1, 1)

218

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

219

STRING "'abc'" (1, 4) (1, 9)

220

OP '+' (1, 10) (1, 11)

221

STRING "'ABC'" (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

222

""")

223

self.check_tokenize('y = "ABC" + "ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

224

NAME 'y' (1, 0) (1, 1)

225

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

226

STRING '"ABC"' (1, 4) (1, 9)

227

OP '+' (1, 10) (1, 11)

228

STRING '"ABC"' (1, 12) (1, 17)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

229

""")

230

self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

231

NAME 'x' (1, 0) (1, 1)

232

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

233

STRING "r'abc'" (1, 4) (1, 10)

234

OP '+' (1, 11) (1, 12)

235

STRING "r'ABC'" (1, 13) (1, 19)

236

OP '+' (1, 20) (1, 21)

237

STRING "R'ABC'" (1, 22) (1, 28)

238

OP '+' (1, 29) (1, 30)

239

STRING "R'ABC'" (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

240

""")

241

self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

242

NAME 'y' (1, 0) (1, 1)

243

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

244

STRING 'r"abc"' (1, 4) (1, 10)

245

OP '+' (1, 11) (1, 12)

246

STRING 'r"ABC"' (1, 13) (1, 19)

247

OP '+' (1, 20) (1, 21)

248

STRING 'R"ABC"' (1, 22) (1, 28)

249

OP '+' (1, 29) (1, 30)

250

STRING 'R"ABC"' (1, 31) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

251

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

252

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

253

self.check_tokenize("u'abc' + U'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

254

STRING "u'abc'" (1, 0) (1, 6)

255

OP '+' (1, 7) (1, 8)

256

STRING "U'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

257

""")

258

self.check_tokenize('u"abc" + U"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

259

STRING 'u"abc"' (1, 0) (1, 6)

260

OP '+' (1, 7) (1, 8)

261

STRING 'U"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

262

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

263

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

264

self.check_tokenize("b'abc' + B'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

265

STRING "b'abc'" (1, 0) (1, 6)

266

OP '+' (1, 7) (1, 8)

267

STRING "B'abc'" (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

268

""")

269

self.check_tokenize('b"abc" + B"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

270

STRING 'b"abc"' (1, 0) (1, 6)

271

OP '+' (1, 7) (1, 8)

272

STRING 'B"abc"' (1, 9) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

273

""")

274

self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

275

STRING "br'abc'" (1, 0) (1, 7)

276

OP '+' (1, 8) (1, 9)

277

STRING "bR'abc'" (1, 10) (1, 17)

278

OP '+' (1, 18) (1, 19)

279

STRING "Br'abc'" (1, 20) (1, 27)

280

OP '+' (1, 28) (1, 29)

281

STRING "BR'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

282

""")

283

self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

284

STRING 'br"abc"' (1, 0) (1, 7)

285

OP '+' (1, 8) (1, 9)

286

STRING 'bR"abc"' (1, 10) (1, 17)

287

OP '+' (1, 18) (1, 19)

288

STRING 'Br"abc"' (1, 20) (1, 27)

289

OP '+' (1, 28) (1, 29)

290

STRING 'BR"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

291

""")

292

self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

293

STRING "rb'abc'" (1, 0) (1, 7)

294

OP '+' (1, 8) (1, 9)

295

STRING "rB'abc'" (1, 10) (1, 17)

296

OP '+' (1, 18) (1, 19)

297

STRING "Rb'abc'" (1, 20) (1, 27)

298

OP '+' (1, 28) (1, 29)

299

STRING "RB'abc'" (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

300

""")

301

self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

302

STRING 'rb"abc"' (1, 0) (1, 7)

303

OP '+' (1, 8) (1, 9)

304

STRING 'rB"abc"' (1, 10) (1, 17)

305

OP '+' (1, 18) (1, 19)

306

STRING 'Rb"abc"' (1, 20) (1, 27)

307

OP '+' (1, 28) (1, 29)

308

STRING 'RB"abc"' (1, 30) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

309

""")

Meador Inge

2012-06-16 21:49:08 -0500

[diff] [blame]

310

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

311

def test_function(self):

312

self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

313

NAME 'def' (1, 0) (1, 3)

314

NAME 'd22' (1, 4) (1, 7)

315

OP '(' (1, 7) (1, 8)

316

NAME 'a' (1, 8) (1, 9)

317

OP ',' (1, 9) (1, 10)

318

NAME 'b' (1, 11) (1, 12)

319

OP ',' (1, 12) (1, 13)

320

NAME 'c' (1, 14) (1, 15)

321

OP '=' (1, 15) (1, 16)

322

NUMBER '2' (1, 16) (1, 17)

323

OP ',' (1, 17) (1, 18)

324

NAME 'd' (1, 19) (1, 20)

325

OP '=' (1, 20) (1, 21)

326

NUMBER '2' (1, 21) (1, 22)

327

OP ',' (1, 22) (1, 23)

328

OP '*' (1, 24) (1, 25)

329

NAME 'k' (1, 25) (1, 26)

330

OP ')' (1, 26) (1, 27)

331

OP ':' (1, 27) (1, 28)

332

NAME 'pass' (1, 29) (1, 33)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

333

""")

334

self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

335

NAME 'def' (1, 0) (1, 3)

336

NAME 'd01v_' (1, 4) (1, 9)

337

OP '(' (1, 9) (1, 10)

338

NAME 'a' (1, 10) (1, 11)

339

OP '=' (1, 11) (1, 12)

340

NUMBER '1' (1, 12) (1, 13)

341

OP ',' (1, 13) (1, 14)

342

OP '*' (1, 15) (1, 16)

343

NAME 'k' (1, 16) (1, 17)

344

OP ',' (1, 17) (1, 18)

345

OP '**' (1, 19) (1, 21)

346

NAME 'w' (1, 21) (1, 22)

347

OP ')' (1, 22) (1, 23)

348

OP ':' (1, 23) (1, 24)

349

NAME 'pass' (1, 25) (1, 29)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

350

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

351

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

352

def test_comparison(self):

353

# Comparison

354

self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "

355

"1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

356

NAME 'if' (1, 0) (1, 2)

357

NUMBER '1' (1, 3) (1, 4)

358

OP '<' (1, 5) (1, 6)

359

NUMBER '1' (1, 7) (1, 8)

360

OP '>' (1, 9) (1, 10)

361

NUMBER '1' (1, 11) (1, 12)

362

OP '==' (1, 13) (1, 15)

363

NUMBER '1' (1, 16) (1, 17)

364

OP '>=' (1, 18) (1, 20)

365

NUMBER '5' (1, 21) (1, 22)

366

OP '<=' (1, 23) (1, 25)

367

NUMBER '0x15' (1, 26) (1, 30)

368

OP '<=' (1, 31) (1, 33)

369

NUMBER '0x12' (1, 34) (1, 38)

370

OP '!=' (1, 39) (1, 41)

371

NUMBER '1' (1, 42) (1, 43)

372

NAME 'and' (1, 44) (1, 47)

373

NUMBER '5' (1, 48) (1, 49)

374

NAME 'in' (1, 50) (1, 52)

375

NUMBER '1' (1, 53) (1, 54)

376

NAME 'not' (1, 55) (1, 58)

377

NAME 'in' (1, 59) (1, 61)

378

NUMBER '1' (1, 62) (1, 63)

379

NAME 'is' (1, 64) (1, 66)

380

NUMBER '1' (1, 67) (1, 68)

381

NAME 'or' (1, 69) (1, 71)

382

NUMBER '5' (1, 72) (1, 73)

383

NAME 'is' (1, 74) (1, 76)

384

NAME 'not' (1, 77) (1, 80)

385

NUMBER '1' (1, 81) (1, 82)

386

OP ':' (1, 82) (1, 83)

387

NAME 'pass' (1, 84) (1, 88)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

388

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

389

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

390

def test_shift(self):

391

# Shift

392

self.check_tokenize("x = 1 << 1 >> 5", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

393

NAME 'x' (1, 0) (1, 1)

394

OP '=' (1, 2) (1, 3)

395

NUMBER '1' (1, 4) (1, 5)

396

OP '<<' (1, 6) (1, 8)

397

NUMBER '1' (1, 9) (1, 10)

398

OP '>>' (1, 11) (1, 13)

399

NUMBER '5' (1, 14) (1, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

400

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

401

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

402

def test_additive(self):

403

# Additive

404

self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

405

NAME 'x' (1, 0) (1, 1)

406

OP '=' (1, 2) (1, 3)

407

NUMBER '1' (1, 4) (1, 5)

408

OP '-' (1, 6) (1, 7)

409

NAME 'y' (1, 8) (1, 9)

410

OP '+' (1, 10) (1, 11)

411

NUMBER '15' (1, 12) (1, 14)

412

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

413

NUMBER '1' (1, 17) (1, 18)

414

OP '+' (1, 19) (1, 20)

415

NUMBER '0x124' (1, 21) (1, 26)

416

OP '+' (1, 27) (1, 28)

417

NAME 'z' (1, 29) (1, 30)

418

OP '+' (1, 31) (1, 32)

419

NAME 'a' (1, 33) (1, 34)

420

OP '[' (1, 34) (1, 35)

421

NUMBER '5' (1, 35) (1, 36)

422

OP ']' (1, 36) (1, 37)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

423

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

424

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

425

def test_multiplicative(self):

426

# Multiplicative

427

self.check_tokenize("x = 1//1*1/5*12%0x12", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

428

NAME 'x' (1, 0) (1, 1)

429

OP '=' (1, 2) (1, 3)

430

NUMBER '1' (1, 4) (1, 5)

431

OP '//' (1, 5) (1, 7)

432

NUMBER '1' (1, 7) (1, 8)

433

OP '*' (1, 8) (1, 9)

434

NUMBER '1' (1, 9) (1, 10)

435

OP '/' (1, 10) (1, 11)

436

NUMBER '5' (1, 11) (1, 12)

437

OP '*' (1, 12) (1, 13)

438

NUMBER '12' (1, 13) (1, 15)

439

OP '%' (1, 15) (1, 16)

440

NUMBER '0x12' (1, 16) (1, 20)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

441

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

442

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

443

def test_unary(self):

444

# Unary

445

self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

446

OP '~' (1, 0) (1, 1)

447

NUMBER '1' (1, 1) (1, 2)

448

OP '^' (1, 3) (1, 4)

449

NUMBER '1' (1, 5) (1, 6)

450

OP '&' (1, 7) (1, 8)

451

NUMBER '1' (1, 9) (1, 10)

452

OP '|' (1, 11) (1, 12)

453

NUMBER '1' (1, 12) (1, 13)

454

OP '^' (1, 14) (1, 15)

455

OP '-' (1, 16) (1, 17)

456

NUMBER '1' (1, 17) (1, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

457

""")

458

self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

459

OP '-' (1, 0) (1, 1)

460

NUMBER '1' (1, 1) (1, 2)

461

OP '*' (1, 2) (1, 3)

462

NUMBER '1' (1, 3) (1, 4)

463

OP '/' (1, 4) (1, 5)

464

NUMBER '1' (1, 5) (1, 6)

465

OP '+' (1, 6) (1, 7)

466

NUMBER '1' (1, 7) (1, 8)

467

OP '*' (1, 8) (1, 9)

468

NUMBER '1' (1, 9) (1, 10)

469

OP '//' (1, 10) (1, 12)

470

NUMBER '1' (1, 12) (1, 13)

471

OP '-' (1, 14) (1, 15)

472

OP '-' (1, 16) (1, 17)

473

OP '-' (1, 17) (1, 18)

474

OP '-' (1, 18) (1, 19)

475

NUMBER '1' (1, 19) (1, 20)

476

OP '**' (1, 20) (1, 22)

477

NUMBER '1' (1, 22) (1, 23)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

478

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

479

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

480

def test_selector(self):

481

# Selector

482

self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

483

NAME 'import' (1, 0) (1, 6)

484

NAME 'sys' (1, 7) (1, 10)

485

OP ',' (1, 10) (1, 11)

486

NAME 'time' (1, 12) (1, 16)

487

NEWLINE '\\n' (1, 16) (1, 17)

488

NAME 'x' (2, 0) (2, 1)

489

OP '=' (2, 2) (2, 3)

490

NAME 'sys' (2, 4) (2, 7)

491

OP '.' (2, 7) (2, 8)

492

NAME 'modules' (2, 8) (2, 15)

493

OP '[' (2, 15) (2, 16)

494

STRING "'time'" (2, 16) (2, 22)

495

OP ']' (2, 22) (2, 23)

496

OP '.' (2, 23) (2, 24)

497

NAME 'time' (2, 24) (2, 28)

498

OP '(' (2, 28) (2, 29)

499

OP ')' (2, 29) (2, 30)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

500

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

501

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

502

def test_method(self):

503

# Methods

504

self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

505

OP '@' (1, 0) (1, 1)

506

NAME 'staticmethod (1, 1) (1, 13)

507

NEWLINE '\\n' (1, 13) (1, 14)

508

NAME 'def' (2, 0) (2, 3)

509

NAME 'foo' (2, 4) (2, 7)

510

OP '(' (2, 7) (2, 8)

511

NAME 'x' (2, 8) (2, 9)

512

OP ',' (2, 9) (2, 10)

513

NAME 'y' (2, 10) (2, 11)

514

OP ')' (2, 11) (2, 12)

515

OP ':' (2, 12) (2, 13)

516

NAME 'pass' (2, 14) (2, 18)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

517

""")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

518

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

519

def test_tabs(self):

520

# Evil tabs

521

self.check_tokenize("def f():\n"

522

"\tif x\n"

523

" \tpass", """\

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

524

NAME 'def' (1, 0) (1, 3)

525

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

530

INDENT '\\t' (2, 0) (2, 1)

531

NAME 'if' (2, 1) (2, 3)

532

NAME 'x' (2, 4) (2, 5)

533

NEWLINE '\\n' (2, 5) (2, 6)

534

INDENT ' \\t' (3, 0) (3, 9)

535

NAME 'pass' (3, 9) (3, 13)

536

DEDENT '' (4, 0) (4, 0)

537

DEDENT '' (4, 0) (4, 0)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

538

""")

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

539

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

540

def test_non_ascii_identifiers(self):

541

# Non-ascii identifiers

542

self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

543

NAME 'Örter' (1, 0) (1, 5)

544

OP '=' (1, 6) (1, 7)

545

STRING "'places'" (1, 8) (1, 16)

546

NEWLINE '\\n' (1, 16) (1, 17)

547

NAME 'grün' (2, 0) (2, 4)

548

OP '=' (2, 5) (2, 6)

549

STRING "'green'" (2, 7) (2, 14)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

550

""")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

551

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

552

def test_unicode(self):

553

# Legacy unicode literals:

554

self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

555

NAME 'Örter' (1, 0) (1, 5)

556

OP '=' (1, 6) (1, 7)

557

STRING "u'places'" (1, 8) (1, 17)

558

NEWLINE '\\n' (1, 17) (1, 18)

559

NAME 'grün' (2, 0) (2, 4)

560

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

561

STRING "U'green'" (2, 7) (2, 15)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

562

""")

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

563

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

564

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

565

def decistmt(s):

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

566

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

567

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

568

for toknum, tokval, _, _, _ in g:

569

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

578

return untokenize(result).decode('utf-8')

579

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

580

class TestMisc(TestCase):

581

582

def test_decistmt(self):

583

# Substitute Decimals for floats in a string of statements.

584

# This is an example from the docs.

585

586

from decimal import Decimal

587

s = '+21.3e-5*-.1234/81.7'

588

self.assertEqual(decistmt(s),

589

"+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")

590

591

# The format of the exponent is inherited from the platform C library.

592

# Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

593

# we're only showing 11 digits, and the 12th isn't close to 5, the

594

# rest of the output should be platform-independent.

595

self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')

596

597

# Output from calculations with Decimal should be identical across all

598

# platforms.

599

self.assertEqual(eval(decistmt(s)),

600

Decimal('-3.217160342717258261933904529E-7'))

601

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

602

603

class TestTokenizerAdheresToPep0263(TestCase):

604

"""

605

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

606

"""

607

608

def _testFile(self, filename):

609

path = os.path.join(os.path.dirname(__file__), filename)

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

610

TestRoundtrip.check_roundtrip(self, open(path, 'rb'))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

611

612

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

613

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

614

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

615

616

def test_latin1_coding_cookie_and_utf8_bom(self):

617

"""

618

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

619

allowed encoding for the comment is 'utf-8'. The text file used in

620

this test starts with a BOM signature, but specifies latin1 as the

621

coding, so verify that a SyntaxError is raised, which matches the

622

behaviour of the interpreter when it encounters a similar condition.

623

"""

624

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

625

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

626

627

def test_no_coding_cookie_and_utf8_bom(self):

628

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

629

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

630

631

def test_utf8_coding_cookie_and_utf8_bom(self):

632

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

633

self._testFile(f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

634

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

635

def test_bad_coding_cookie(self):

636

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

637

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

638

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

639

640

class Test_Tokenize(TestCase):

641

642

def test__tokenize_decodes_with_specified_encoding(self):

643

literal = '"ЉЊЈЁЂ"'

644

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

655

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

656

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

657

self.assertEqual(tokens, expected_tokens,

658

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

659

660

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

673

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

674

self.assertEqual(tokens, expected_tokens,

675

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

676

677

678

class TestDetectEncoding(TestCase):

679

680

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

692

lines = (

693

b'# something\n',

694

b'print(something)\n',

695

b'do_something(else)\n'

696

)

697

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

698

self.assertEqual(encoding, 'utf-8')

699

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

700

701

def test_bom_no_cookie(self):

702

lines = (

703

b'\xef\xbb\xbf# something\n',

704

b'print(something)\n',

705

b'do_something(else)\n'

706

)

707

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

708

self.assertEqual(encoding, 'utf-8-sig')

709

self.assertEqual(consumed_lines,

710

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

711

712

def test_cookie_first_line_no_bom(self):

713

lines = (

714

b'# -*- coding: latin-1 -*-\n',

715

b'print(something)\n',

716

b'do_something(else)\n'

717

)

718

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

719

self.assertEqual(encoding, 'iso-8859-1')

720

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

721

722

def test_matched_bom_and_cookie_first_line(self):

723

lines = (

724

b'\xef\xbb\xbf# coding=utf-8\n',

725

b'print(something)\n',

726

b'do_something(else)\n'

727

)

728

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

729

self.assertEqual(encoding, 'utf-8-sig')

730

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

731

732

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

733

lines = (

734

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

735

b'print(something)\n',

736

b'do_something(else)\n'

737

)

738

readline = self.get_readline(lines)

739

self.assertRaises(SyntaxError, detect_encoding, readline)

740

741

def test_cookie_second_line_no_bom(self):

742

lines = (

743

b'#! something\n',

744

b'# vim: set fileencoding=ascii :\n',

745

b'print(something)\n',

746

b'do_something(else)\n'

747

)

748

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

749

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

750

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

751

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

752

753

def test_matched_bom_and_cookie_second_line(self):

754

lines = (

755

b'\xef\xbb\xbf#! something\n',

756

b'f# coding=utf-8\n',

757

b'print(something)\n',

758

b'do_something(else)\n'

759

)

760

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

761

self.assertEqual(encoding, 'utf-8-sig')

762

self.assertEqual(consumed_lines,

763

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

764

765

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

766

lines = (

767

b'\xef\xbb\xbf#! something\n',

768

b'# vim: set fileencoding=ascii :\n',

769

b'print(something)\n',

770

b'do_something(else)\n'

771

)

772

readline = self.get_readline(lines)

773

self.assertRaises(SyntaxError, detect_encoding, readline)

774

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

775

def test_cookie_second_line_noncommented_first_line(self):

776

lines = (

777

b"print('\xc2\xa3')\n",

778

b'# vim: set fileencoding=iso8859-15 :\n',

779

b"print('\xe2\x82\xac')\n"

780

)

781

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

782

self.assertEqual(encoding, 'utf-8')

783

expected = [b"print('\xc2\xa3')\n"]

784

self.assertEqual(consumed_lines, expected)

785

786

def test_cookie_second_line_commented_first_line(self):

787

lines = (

788

b"#print('\xc2\xa3')\n",

789

b'# vim: set fileencoding=iso8859-15 :\n',

790

b"print('\xe2\x82\xac')\n"

791

)

792

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

793

self.assertEqual(encoding, 'iso8859-15')

794

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

795

self.assertEqual(consumed_lines, expected)

796

797

def test_cookie_second_line_empty_first_line(self):

798

lines = (

799

b'\n',

800

b'# vim: set fileencoding=iso8859-15 :\n',

801

b"print('\xe2\x82\xac')\n"

802

)

803

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

804

self.assertEqual(encoding, 'iso8859-15')

805

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

806

self.assertEqual(consumed_lines, expected)

807

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

808

def test_latin1_normalization(self):

809

# See get_normal_name() in tokenizer.c.

810

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

811

"iso-8859-1-unix", "iso-latin-1-mac")

812

for encoding in encodings:

813

for rep in ("-", "_"):

814

enc = encoding.replace("-", rep)

815

lines = (b"#!/usr/bin/python\n",

816

b"# coding: " + enc.encode("ascii") + b"\n",

817

b"print(things)\n",

818

b"do_something += 4\n")

819

rl = self.get_readline(lines)

820

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

821

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

822

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

823

def test_syntaxerror_latin1(self):

824

# Issue 14629: need to raise SyntaxError if the first

825

# line(s) have non-UTF-8 characters

826

lines = (

827

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

828

)

829

readline = self.get_readline(lines)

830

self.assertRaises(SyntaxError, detect_encoding, readline)

831

832

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

833

def test_utf8_normalization(self):

834

# See get_normal_name() in tokenizer.c.

835

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

836

for encoding in encodings:

837

for rep in ("-", "_"):

838

enc = encoding.replace("-", rep)

839

lines = (b"#!/usr/bin/python\n",

840

b"# coding: " + enc.encode("ascii") + b"\n",

841

b"1 + 3\n")

842

rl = self.get_readline(lines)

843

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

844

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

845

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

846

def test_short_files(self):

847

readline = self.get_readline((b'print(something)\n',))

848

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

849

self.assertEqual(encoding, 'utf-8')

850

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

851

852

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

853

self.assertEqual(encoding, 'utf-8')

854

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

855

856

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

857

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

858

self.assertEqual(encoding, 'utf-8-sig')

859

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

860

861

readline = self.get_readline((b'\xef\xbb\xbf',))

862

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

863

self.assertEqual(encoding, 'utf-8-sig')

864

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

865

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

866

readline = self.get_readline((b'# coding: bad\n',))

867

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

868

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

869

def test_false_encoding(self):

870

# Issue 18873: "Encoding" detected in non-comment lines

871

readline = self.get_readline((b'print("#coding=fake")',))

872

encoding, consumed_lines = detect_encoding(readline)

873

self.assertEqual(encoding, 'utf-8')

874

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

875

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

876

def test_open(self):

877

filename = support.TESTFN + '.py'

878

self.addCleanup(support.unlink, filename)

879

880

# test coding cookie

881

for encoding in ('iso-8859-15', 'utf-8'):

882

with open(filename, 'w', encoding=encoding) as fp:

883

print("# coding: %s" % encoding, file=fp)

884

print("print('euro:\u20ac')", file=fp)

885

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

886

self.assertEqual(fp.encoding, encoding)

887

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

888

889

# test BOM (no coding cookie)

890

with open(filename, 'w', encoding='utf-8-sig') as fp:

891

print("print('euro:\u20ac')", file=fp)

892

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

893

self.assertEqual(fp.encoding, 'utf-8-sig')

894

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

895

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

896

def test_filename_in_exception(self):

897

# When possible, include the file name in the exception.

898

path = 'some_file_path'

899

lines = (

900

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

901

)

902

class Bunk:

903

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

910

raise StopIteration

911

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

916

ins = Bunk(lines, path)

917

# Make sure lacking a name isn't an issue.

918

del ins.name

919

detect_encoding(ins.readline)

920

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

921

ins = Bunk(lines, path)

922

detect_encoding(ins.readline)

923

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

924

def test_open_error(self):

925

# Issue #23840: open() must close the binary file on error

926

m = BytesIO(b'#coding:xxx')

927

with mock.patch('tokenize._builtin_open', return_value=m):

928

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

929

self.assertTrue(m.closed)

930

931

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

932

class TestTokenize(TestCase):

933

934

def test_tokenize(self):

935

import tokenize as tokenize_module

936

encoding = object()

937

encoding_used = None

938

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

939

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

940

941

def mock__tokenize(readline, encoding):

942

nonlocal encoding_used

943

encoding_used = encoding

944

out = []

945

while True:

946

next_line = readline()

947

if next_line:

948

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

958

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

959

960

orig_detect_encoding = tokenize_module.detect_encoding

961

orig__tokenize = tokenize_module._tokenize

962

tokenize_module.detect_encoding = mock_detect_encoding

963

tokenize_module._tokenize = mock__tokenize

964

try:

965

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

966

self.assertEqual(list(results),

967

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

968

finally:

969

tokenize_module.detect_encoding = orig_detect_encoding

970

tokenize_module._tokenize = orig__tokenize

971

972

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

973

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

974

def assertExactTypeEqual(self, opstr, *optypes):

975

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

976

num_optypes = len(optypes)

977

self.assertEqual(len(tokens), 2 + num_optypes)

978

self.assertEqual(token.tok_name[tokens[0].exact_type],

979

token.tok_name[ENCODING])

980

for i in range(num_optypes):

981

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

982

token.tok_name[optypes[i]])

983

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

984

token.tok_name[token.ENDMARKER])

985

986

def test_exact_type(self):

987

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

988

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

989

self.assertExactTypeEqual(':', token.COLON)

990

self.assertExactTypeEqual(',', token.COMMA)

991

self.assertExactTypeEqual(';', token.SEMI)

992

self.assertExactTypeEqual('+', token.PLUS)

993

self.assertExactTypeEqual('-', token.MINUS)

994

self.assertExactTypeEqual('*', token.STAR)

995

self.assertExactTypeEqual('/', token.SLASH)

996

self.assertExactTypeEqual('|', token.VBAR)

997

self.assertExactTypeEqual('&', token.AMPER)

998

self.assertExactTypeEqual('<', token.LESS)

999

self.assertExactTypeEqual('>', token.GREATER)

1000

self.assertExactTypeEqual('=', token.EQUAL)

1001

self.assertExactTypeEqual('.', token.DOT)

1002

self.assertExactTypeEqual('%', token.PERCENT)

1003

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1004

self.assertExactTypeEqual('==', token.EQEQUAL)

1005

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1006

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1007

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1008

self.assertExactTypeEqual('~', token.TILDE)

1009

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1010

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1011

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1012

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1013

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1014

self.assertExactTypeEqual('-=', token.MINEQUAL)

1015

self.assertExactTypeEqual('*=', token.STAREQUAL)

1016

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1017

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1018

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1019

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1020

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1021

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1022

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1023

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1024

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1025

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1026

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1027

self.assertExactTypeEqual('@', token.AT)

1028

1029

self.assertExactTypeEqual('a**2+b**2==c**2',

1030

NAME, token.DOUBLESTAR, NUMBER,

1031

token.PLUS,

1032

NAME, token.DOUBLESTAR, NUMBER,

1033

token.EQEQUAL,

1034

NAME, token.DOUBLESTAR, NUMBER)

1035

self.assertExactTypeEqual('{1, 2, 3}',

1036

token.LBRACE,

1037

token.NUMBER, token.COMMA,

1038

token.NUMBER, token.COMMA,

1039

token.NUMBER,

1040

token.RBRACE)

1041

self.assertExactTypeEqual('^(x & 0x1)',

1042

token.CIRCUMFLEX,

1043

token.LPAR,

1044

token.NAME, token.AMPER, token.NUMBER,

1045

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1046

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1047

def test_pathological_trailing_whitespace(self):

1048

# See http://bugs.python.org/issue16152

1049

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1050

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1051

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1052

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1053

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1054

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1055

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1060

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1061

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1062

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1063

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1064

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1065

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1066

def test_backslash_continuation(self):

1067

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1073

self.assertEqual(u.tokens, ['\\\n'])

1074

u.prev_row = 2

1075

u.add_whitespace((4, 4))

1076

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1077

TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1078

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1079

def test_iter_compat(self):

1080

u = Untokenizer()

1081

token = (NAME, 'Hello')

1082

tokens = [(ENCODING, 'utf-8'), token]

1083

u.compat(token, iter([]))

1084

self.assertEqual(u.tokens, ["Hello "])

1085

u = Untokenizer()

1086

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1087

u = Untokenizer()

1088

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1089

self.assertEqual(u.encoding, 'utf-8')

1090

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1091

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1092

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1093

class TestRoundtrip(TestCase):

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1094

1095

def check_roundtrip(self, f):

1096

"""

1097

Test roundtrip for `untokenize`. `f` is an open file or a string.

1098

The source code in f is tokenized to both 5- and 2-tuples.

1099

Both sequences are converted back to source code via

1100

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

1101

The test fails if the 3 pair tokenizations do not match.

1102

1103

When untokenize bugs are fixed, untokenize with 5-tuples should

1104

reproduce code that does not contain a backslash continuation

1105

following spaces. A proper test should test this.

1106

"""

1107

# Get source code and original tokenizations

1108

if isinstance(f, str):

1109

code = f.encode('utf-8')

else:

code = f.read()

f.close()

readline = iter(code.splitlines(keepends=True)).__next__

1114

tokens5 = list(tokenize(readline))

1115

tokens2 = [tok[:2] for tok in tokens5]

1116

# Reproduce tokens2 from pairs

1117

bytes_from2 = untokenize(tokens2)

1118

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

1119

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

1120

self.assertEqual(tokens2_from2, tokens2)

1121

# Reproduce tokens2 from 5-tuples

1122

bytes_from5 = untokenize(tokens5)

1123

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

1124

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

1125

self.assertEqual(tokens2_from5, tokens2)

1126

1127

def test_roundtrip(self):

1128

# There are some standard formatting practices that are easy to get right.

1129

1130

self.check_roundtrip("if x == 1:\n"

1131

" print(x)\n")

1132

self.check_roundtrip("# This is a comment\n"

1133

"# This also")

1134

1135

# Some people use different formatting conventions, which makes

1136

# untokenize a little trickier. Note that this test involves trailing

1137

# whitespace after the colon. Note that we use hex escapes to make the

1138

# two trailing blanks apparent in the expected output.

1139

1140

self.check_roundtrip("if x == 1 : \n"

1141

" print(x)\n")

1142

fn = support.findfile("tokenize_tests.txt")

1143

with open(fn, 'rb') as f:

1144

self.check_roundtrip(f)

1145

self.check_roundtrip("if x == 1:\n"

1146

" # A comment by itself.\n"

1147

" print(x) # Comment here, too.\n"

1148

" # Another comment.\n"

1149

"after_if = True\n")

1150

self.check_roundtrip("if (x # The comments need to go in the right place\n"

1151

" == 1):\n"

1152

" print('x==1')\n")

1153

self.check_roundtrip("class Test: # A comment here\n"

1154

" # A comment with weird indent\n"

1155

" after_com = 5\n"

1156

" def x(m): return m*5 # a one liner\n"

1157

" def y(m): # A whitespace after the colon\n"

1158

" return y*4 # 3-space indent\n")

1159

1160

# Some error-handling code

1161

self.check_roundtrip("try: import somemodule\n"

1162

"except ImportError: # comment\n"

1163

" print('Can not import' # comment2\n)"

1164

"else: print('Loaded')\n")

1165

1166

def test_continuation(self):

1167

# Balancing continuation

1168

self.check_roundtrip("a = (3,4, \n"

"5,6)\n"

"y = [3, 4,\n"

"5]\n"

"z = {'a': 5,\n"

"'b':15, 'c':True}\n"

1174

"x = len(y) + 5 - a[\n"

"3] - a[2]\n"

"+ len(z) - z[\n"

"'b']\n")

def test_backslash_continuation(self):

1180

# Backslash means line continuation, except for comments

1181

self.check_roundtrip("x=1+\\\n"

1182

"1\n"

1183

"# This is a comment\\\n"

1184

"# This also\n")

1185

self.check_roundtrip("# Comment \\\n"

1186

"x = 0")

1187

1188

def test_string_concatenation(self):

1189

# Two string literals on the same line

1190

self.check_roundtrip("'' ''")

1191

1192

def test_random_files(self):

1193

# Test roundtrip on random python modules.

1194

# pass the '-ucpu' option to process the full directory.

1195

1196

import glob, random

1197

fn = support.findfile("tokenize_tests.txt")

1198

tempdir = os.path.dirname(fn) or os.curdir

1199

testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

1200

1201

# Tokenize is broken on test_pep3131.py because regular expressions are

1202

# broken on the obscure unicode identifiers in it. *sigh*

1203

# With roundtrip extended to test the 5-tuple mode of untokenize,

1204

# 7 more testfiles fail. Remove them also until the failure is diagnosed.

1205

1206

testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))

1207

for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

1208

testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

1209

1210

if not support.is_resource_enabled("cpu"):

1211

testfiles = random.sample(testfiles, 10)

1212

1213

for testfile in testfiles:

1214

with open(testfile, 'rb') as f:

1215

with self.subTest(file=testfile):

1216

self.check_roundtrip(f)

1217

1218

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1219

def roundtrip(self, code):

1220

if isinstance(code, str):

1221

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1222

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1223

1224

def test_indentation_semantics_retained(self):

1225

"""

1226

Ensure that although whitespace might be mutated in a roundtrip,

1227

the semantic meaning of the indentation remains consistent.

1228

"""

1229

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1230

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1231

self.assertEqual(codelines[1], codelines[2])

Serhiy Storchaka

2015-10-06 18:16:28 +0300

[diff] [blame]

1232

self.check_roundtrip(code)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1233

1234

Thomas Wouters

49fd7fa

2006-04-21 10:40:58 +0000

[diff] [blame]

1235

if __name__ == "__main__":

Serhiy Storchaka