Blame - Lib/test/test_tokenize.py - platform/external/python/cpython3

2008-03-16 00:07:10 +0000

[diff] [blame]

1

doctests = """

2

Tests for the tokenize module.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

3

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

4

The tests can be really simple. Given a small fragment of source

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

5

code, print out a table with tokens. The ENDMARKER is omitted for

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

6

brevity.

7

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

8

>>> import glob

9

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

10

>>> dump_tokens("1 + 1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

11

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

12

NUMBER '1' (1, 0) (1, 1)

13

OP '+' (1, 2) (1, 3)

14

NUMBER '1' (1, 4) (1, 5)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

15

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

16

>>> dump_tokens("if False:\\n"

17

... " # NL\\n"

18

... " True = False # NEWLINE\\n")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

19

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

20

NAME 'if' (1, 0) (1, 2)

21

NAME 'False' (1, 3) (1, 8)

22

OP ':' (1, 8) (1, 9)

23

NEWLINE '\\n' (1, 9) (1, 10)

24

COMMENT '# NL' (2, 4) (2, 8)

25

NL '\\n' (2, 8) (2, 9)

26

INDENT ' ' (3, 0) (3, 4)

27

NAME 'True' (3, 4) (3, 8)

28

OP '=' (3, 9) (3, 10)

29

NAME 'False' (3, 11) (3, 16)

30

COMMENT '# NEWLINE' (3, 17) (3, 26)

31

NEWLINE '\\n' (3, 26) (3, 27)

32

DEDENT '' (4, 0) (4, 0)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

33

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

34

>>> indent_error_file = \"""

... def k(x):

... x += 2

... x += 5

... \"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

39

>>> readline = BytesIO(indent_error_file.encode('utf-8')).readline

40

>>> for tok in tokenize(readline): pass

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

41

Traceback (most recent call last):

42

...

43

IndentationError: unindent does not match any outer indentation level

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

44

Mark Dickinson

3c0b317

2010-06-29 07:38:37 +0000

[diff] [blame]

45

There are some standard formatting practices that are easy to get right.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

46

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

47

>>> roundtrip("if x == 1:\\n"

48

... " print(x)\\n")

49

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

50

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

51

>>> roundtrip("# This is a comment\\n# This also")

52

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

53

54

Some people use different formatting conventions, which makes

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

55

untokenize a little trickier. Note that this test involves trailing

56

whitespace after the colon. Note that we use hex escapes to make the

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

57

two trailing blanks apparent in the expected output.

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

58

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

59

>>> roundtrip("if x == 1 : \\n"

60

... " print(x)\\n")

61

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

62

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

63

>>> f = support.findfile("tokenize_tests.txt")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

64

>>> roundtrip(open(f, 'rb'))

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

65

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

66

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

67

>>> roundtrip("if x == 1:\\n"

68

... " # A comment by itself.\\n"

69

... " print(x) # Comment here, too.\\n"

70

... " # Another comment.\\n"

71

... "after_if = True\\n")

72

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

73

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

74

>>> roundtrip("if (x # The comments need to go in the right place\\n"

75

... " == 1):\\n"

76

... " print('x==1')\\n")

77

True

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

78

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

79

>>> roundtrip("class Test: # A comment here\\n"

80

... " # A comment with weird indent\\n"

81

... " after_com = 5\\n"

82

... " def x(m): return m*5 # a one liner\\n"

83

... " def y(m): # A whitespace after the colon\\n"

84

... " return y*4 # 3-space indent\\n")

85

True

86

87

Some error-handling code

88

89

>>> roundtrip("try: import somemodule\\n"

90

... "except ImportError: # comment\\n"

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

91

... " print('Can not import' # comment2\\n)"

Neal Norwitz

752abd0

2008-05-13 04:55:24 +0000

[diff] [blame]

92

... "else: print('Loaded')\\n")

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

93

True

94

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

95

Balancing continuation

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

96

97

>>> roundtrip("a = (3,4, \\n"

... "5,6)\\n"

... "y = [3, 4,\\n"

... "5]\\n"

... "z = {'a': 5,\\n"

102

... "'b':15, 'c':True}\\n"

103

... "x = len(y) + 5 - a[\\n"

104

... "3] - a[2]\\n"

105

... "+ len(z) - z[\\n"

... "'b']\\n")

True

Ordinary integers and binary operators

110

111

>>> dump_tokens("0xff <= 255")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

112

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

113

NUMBER '0xff' (1, 0) (1, 4)

114

OP '<=' (1, 5) (1, 7)

115

NUMBER '255' (1, 8) (1, 11)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

116

>>> dump_tokens("0b10 <= 255")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

117

ENCODING 'utf-8' (0, 0) (0, 0)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

118

NUMBER '0b10' (1, 0) (1, 4)

119

OP '<=' (1, 5) (1, 7)

120

NUMBER '255' (1, 8) (1, 11)

121

>>> dump_tokens("0o123 <= 0O123")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

122

ENCODING 'utf-8' (0, 0) (0, 0)

Eric Smith

2008-03-17 19:49:19 +0000

[diff] [blame]

123

NUMBER '0o123' (1, 0) (1, 5)

124

OP '<=' (1, 6) (1, 8)

125

NUMBER '0O123' (1, 9) (1, 14)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

126

>>> dump_tokens("1234567 > ~0x15")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

127

ENCODING 'utf-8' (0, 0) (0, 0)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

128

NUMBER '1234567' (1, 0) (1, 7)

129

OP '>' (1, 8) (1, 9)

130

OP '~' (1, 10) (1, 11)

131

NUMBER '0x15' (1, 11) (1, 15)

132

>>> dump_tokens("2134568 != 1231515")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

133

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

134

NUMBER '2134568' (1, 0) (1, 7)

135

OP '!=' (1, 8) (1, 10)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

136

NUMBER '1231515' (1, 11) (1, 18)

137

>>> dump_tokens("(-124561-1) & 200000000")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

138

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

139

OP '(' (1, 0) (1, 1)

140

OP '-' (1, 1) (1, 2)

141

NUMBER '124561' (1, 2) (1, 8)

142

OP '-' (1, 8) (1, 9)

143

NUMBER '1' (1, 9) (1, 10)

144

OP ')' (1, 10) (1, 11)

145

OP '&' (1, 12) (1, 13)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

146

NUMBER '200000000' (1, 14) (1, 23)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

147

>>> dump_tokens("0xdeadbeef != -1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

148

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

149

NUMBER '0xdeadbeef' (1, 0) (1, 10)

150

OP '!=' (1, 11) (1, 13)

151

OP '-' (1, 14) (1, 15)

152

NUMBER '1' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

153

>>> dump_tokens("0xdeadc0de & 12345")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

154

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

155

NUMBER '0xdeadc0de' (1, 0) (1, 10)

156

OP '&' (1, 11) (1, 12)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

157

NUMBER '12345' (1, 13) (1, 18)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

158

>>> dump_tokens("0xFF & 0x15 | 1234")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

159

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

160

NUMBER '0xFF' (1, 0) (1, 4)

161

OP '&' (1, 5) (1, 6)

162

NUMBER '0x15' (1, 7) (1, 11)

163

OP '|' (1, 12) (1, 13)

164

NUMBER '1234' (1, 14) (1, 18)

Long integers

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

168

>>> dump_tokens("x = 0")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

169

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

170

NAME 'x' (1, 0) (1, 1)

171

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

172

NUMBER '0' (1, 4) (1, 5)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

173

>>> dump_tokens("x = 0xfffffffffff")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

174

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

175

NAME 'x' (1, 0) (1, 1)

176

OP '=' (1, 2) (1, 3)

177

NUMBER '0xffffffffff (1, 4) (1, 17)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

178

>>> dump_tokens("x = 123141242151251616110")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

179

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

180

NAME 'x' (1, 0) (1, 1)

181

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

182

NUMBER '123141242151 (1, 4) (1, 25)

183

>>> dump_tokens("x = -15921590215012591")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

184

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

185

NAME 'x' (1, 0) (1, 1)

186

OP '=' (1, 2) (1, 3)

187

OP '-' (1, 4) (1, 5)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

188

NUMBER '159215902150 (1, 5) (1, 22)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

189

190

Floating point numbers

191

192

>>> dump_tokens("x = 3.14159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

193

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

194

NAME 'x' (1, 0) (1, 1)

195

OP '=' (1, 2) (1, 3)

196

NUMBER '3.14159' (1, 4) (1, 11)

197

>>> dump_tokens("x = 314159.")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

198

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

199

NAME 'x' (1, 0) (1, 1)

200

OP '=' (1, 2) (1, 3)

201

NUMBER '314159.' (1, 4) (1, 11)

202

>>> dump_tokens("x = .314159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

203

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

204

NAME 'x' (1, 0) (1, 1)

205

OP '=' (1, 2) (1, 3)

206

NUMBER '.314159' (1, 4) (1, 11)

207

>>> dump_tokens("x = 3e14159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

208

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

209

NAME 'x' (1, 0) (1, 1)

210

OP '=' (1, 2) (1, 3)

211

NUMBER '3e14159' (1, 4) (1, 11)

212

>>> dump_tokens("x = 3E123")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

213

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

214

NAME 'x' (1, 0) (1, 1)

215

OP '=' (1, 2) (1, 3)

216

NUMBER '3E123' (1, 4) (1, 9)

217

>>> dump_tokens("x+y = 3e-1230")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

218

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

219

NAME 'x' (1, 0) (1, 1)

220

OP '+' (1, 1) (1, 2)

221

NAME 'y' (1, 2) (1, 3)

222

OP '=' (1, 4) (1, 5)

223

NUMBER '3e-1230' (1, 6) (1, 13)

224

>>> dump_tokens("x = 3.14e159")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

225

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

226

NAME 'x' (1, 0) (1, 1)

227

OP '=' (1, 2) (1, 3)

228

NUMBER '3.14e159' (1, 4) (1, 12)

String literals

>>> dump_tokens("x = ''; y = \\\"\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

233

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

234

NAME 'x' (1, 0) (1, 1)

235

OP '=' (1, 2) (1, 3)

236

STRING "''" (1, 4) (1, 6)

237

OP ';' (1, 6) (1, 7)

238

NAME 'y' (1, 8) (1, 9)

239

OP '=' (1, 10) (1, 11)

240

STRING '""' (1, 12) (1, 14)

241

>>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

242

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

243

NAME 'x' (1, 0) (1, 1)

244

OP '=' (1, 2) (1, 3)

245

STRING '\\'"\\'' (1, 4) (1, 7)

246

OP ';' (1, 7) (1, 8)

247

NAME 'y' (1, 9) (1, 10)

248

OP '=' (1, 11) (1, 12)

249

STRING '"\\'"' (1, 13) (1, 16)

250

>>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

251

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

252

NAME 'x' (1, 0) (1, 1)

253

OP '=' (1, 2) (1, 3)

254

STRING '"doesn\\'t "' (1, 4) (1, 14)

255

NAME 'shrink' (1, 14) (1, 20)

256

STRING '", does it"' (1, 20) (1, 31)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

257

>>> dump_tokens("x = 'abc' + 'ABC'")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

258

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

259

NAME 'x' (1, 0) (1, 1)

260

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

261

STRING "'abc'" (1, 4) (1, 9)

262

OP '+' (1, 10) (1, 11)

263

STRING "'ABC'" (1, 12) (1, 17)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

264

>>> dump_tokens('y = "ABC" + "ABC"')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

265

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

266

NAME 'y' (1, 0) (1, 1)

267

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

268

STRING '"ABC"' (1, 4) (1, 9)

269

OP '+' (1, 10) (1, 11)

270

STRING '"ABC"' (1, 12) (1, 17)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

271

>>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

272

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

273

NAME 'x' (1, 0) (1, 1)

274

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

275

STRING "r'abc'" (1, 4) (1, 10)

276

OP '+' (1, 11) (1, 12)

277

STRING "r'ABC'" (1, 13) (1, 19)

278

OP '+' (1, 20) (1, 21)

279

STRING "R'ABC'" (1, 22) (1, 28)

280

OP '+' (1, 29) (1, 30)

281

STRING "R'ABC'" (1, 31) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

282

>>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

283

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

284

NAME 'y' (1, 0) (1, 1)

285

OP '=' (1, 2) (1, 3)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

286

STRING 'r"abc"' (1, 4) (1, 10)

287

OP '+' (1, 11) (1, 12)

288

STRING 'r"ABC"' (1, 13) (1, 19)

289

OP '+' (1, 20) (1, 21)

290

STRING 'R"ABC"' (1, 22) (1, 28)

291

OP '+' (1, 29) (1, 30)

292

STRING 'R"ABC"' (1, 31) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

293

Meador Inge

8d5c0b8

2012-06-16 21:49:08 -0500

[diff] [blame]

294

>>> dump_tokens("u'abc' + U'abc'")

295

ENCODING 'utf-8' (0, 0) (0, 0)

296

STRING "u'abc'" (1, 0) (1, 6)

297

OP '+' (1, 7) (1, 8)

298

STRING "U'abc'" (1, 9) (1, 15)

299

>>> dump_tokens('u"abc" + U"abc"')

300

ENCODING 'utf-8' (0, 0) (0, 0)

301

STRING 'u"abc"' (1, 0) (1, 6)

302

OP '+' (1, 7) (1, 8)

303

STRING 'U"abc"' (1, 9) (1, 15)

Meador Inge

8d5c0b8

2012-06-16 21:49:08 -0500

[diff] [blame]

304

305

>>> dump_tokens("b'abc' + B'abc'")

306

ENCODING 'utf-8' (0, 0) (0, 0)

307

STRING "b'abc'" (1, 0) (1, 6)

308

OP '+' (1, 7) (1, 8)

309

STRING "B'abc'" (1, 9) (1, 15)

310

>>> dump_tokens('b"abc" + B"abc"')

311

ENCODING 'utf-8' (0, 0) (0, 0)

312

STRING 'b"abc"' (1, 0) (1, 6)

313

OP '+' (1, 7) (1, 8)

314

STRING 'B"abc"' (1, 9) (1, 15)

315

>>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")

316

ENCODING 'utf-8' (0, 0) (0, 0)

317

STRING "br'abc'" (1, 0) (1, 7)

318

OP '+' (1, 8) (1, 9)

319

STRING "bR'abc'" (1, 10) (1, 17)

320

OP '+' (1, 18) (1, 19)

321

STRING "Br'abc'" (1, 20) (1, 27)

322

OP '+' (1, 28) (1, 29)

323

STRING "BR'abc'" (1, 30) (1, 37)

324

>>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')

325

ENCODING 'utf-8' (0, 0) (0, 0)

326

STRING 'br"abc"' (1, 0) (1, 7)

327

OP '+' (1, 8) (1, 9)

328

STRING 'bR"abc"' (1, 10) (1, 17)

329

OP '+' (1, 18) (1, 19)

330

STRING 'Br"abc"' (1, 20) (1, 27)

331

OP '+' (1, 28) (1, 29)

332

STRING 'BR"abc"' (1, 30) (1, 37)

333

>>> dump_tokens("rb'abc' + rB'abc' + Rb'abc' + RB'abc'")

334

ENCODING 'utf-8' (0, 0) (0, 0)

335

STRING "rb'abc'" (1, 0) (1, 7)

336

OP '+' (1, 8) (1, 9)

337

STRING "rB'abc'" (1, 10) (1, 17)

338

OP '+' (1, 18) (1, 19)

339

STRING "Rb'abc'" (1, 20) (1, 27)

340

OP '+' (1, 28) (1, 29)

341

STRING "RB'abc'" (1, 30) (1, 37)

342

>>> dump_tokens('rb"abc" + rB"abc" + Rb"abc" + RB"abc"')

343

ENCODING 'utf-8' (0, 0) (0, 0)

344

STRING 'rb"abc"' (1, 0) (1, 7)

345

OP '+' (1, 8) (1, 9)

346

STRING 'rB"abc"' (1, 10) (1, 17)

347

OP '+' (1, 18) (1, 19)

348

STRING 'Rb"abc"' (1, 20) (1, 27)

349

OP '+' (1, 28) (1, 29)

350

STRING 'RB"abc"' (1, 30) (1, 37)

351

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

352

Operators

353

354

>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

355

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

356

NAME 'def' (1, 0) (1, 3)

357

NAME 'd22' (1, 4) (1, 7)

358

OP '(' (1, 7) (1, 8)

359

NAME 'a' (1, 8) (1, 9)

360

OP ',' (1, 9) (1, 10)

361

NAME 'b' (1, 11) (1, 12)

362

OP ',' (1, 12) (1, 13)

363

NAME 'c' (1, 14) (1, 15)

364

OP '=' (1, 15) (1, 16)

365

NUMBER '2' (1, 16) (1, 17)

366

OP ',' (1, 17) (1, 18)

367

NAME 'd' (1, 19) (1, 20)

368

OP '=' (1, 20) (1, 21)

369

NUMBER '2' (1, 21) (1, 22)

370

OP ',' (1, 22) (1, 23)

371

OP '*' (1, 24) (1, 25)

372

NAME 'k' (1, 25) (1, 26)

373

OP ')' (1, 26) (1, 27)

374

OP ':' (1, 27) (1, 28)

375

NAME 'pass' (1, 29) (1, 33)

376

>>> dump_tokens("def d01v_(a=1, *k, **w): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

377

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

378

NAME 'def' (1, 0) (1, 3)

379

NAME 'd01v_' (1, 4) (1, 9)

380

OP '(' (1, 9) (1, 10)

381

NAME 'a' (1, 10) (1, 11)

382

OP '=' (1, 11) (1, 12)

383

NUMBER '1' (1, 12) (1, 13)

384

OP ',' (1, 13) (1, 14)

385

OP '*' (1, 15) (1, 16)

386

NAME 'k' (1, 16) (1, 17)

387

OP ',' (1, 17) (1, 18)

388

OP '**' (1, 19) (1, 21)

389

NAME 'w' (1, 21) (1, 22)

390

OP ')' (1, 22) (1, 23)

391

OP ':' (1, 23) (1, 24)

392

NAME 'pass' (1, 25) (1, 29)

Comparison

>>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +

397

... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

398

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

399

NAME 'if' (1, 0) (1, 2)

400

NUMBER '1' (1, 3) (1, 4)

401

OP '<' (1, 5) (1, 6)

402

NUMBER '1' (1, 7) (1, 8)

403

OP '>' (1, 9) (1, 10)

404

NUMBER '1' (1, 11) (1, 12)

405

OP '==' (1, 13) (1, 15)

406

NUMBER '1' (1, 16) (1, 17)

407

OP '>=' (1, 18) (1, 20)

408

NUMBER '5' (1, 21) (1, 22)

409

OP '<=' (1, 23) (1, 25)

410

NUMBER '0x15' (1, 26) (1, 30)

411

OP '<=' (1, 31) (1, 33)

412

NUMBER '0x12' (1, 34) (1, 38)

413

OP '!=' (1, 39) (1, 41)

414

NUMBER '1' (1, 42) (1, 43)

415

NAME 'and' (1, 44) (1, 47)

416

NUMBER '5' (1, 48) (1, 49)

417

NAME 'in' (1, 50) (1, 52)

418

NUMBER '1' (1, 53) (1, 54)

419

NAME 'not' (1, 55) (1, 58)

420

NAME 'in' (1, 59) (1, 61)

421

NUMBER '1' (1, 62) (1, 63)

422

NAME 'is' (1, 64) (1, 66)

423

NUMBER '1' (1, 67) (1, 68)

424

NAME 'or' (1, 69) (1, 71)

425

NUMBER '5' (1, 72) (1, 73)

426

NAME 'is' (1, 74) (1, 76)

427

NAME 'not' (1, 77) (1, 80)

428

NUMBER '1' (1, 81) (1, 82)

429

OP ':' (1, 82) (1, 83)

430

NAME 'pass' (1, 84) (1, 88)

Shift

>>> dump_tokens("x = 1 << 1 >> 5")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

435

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

436

NAME 'x' (1, 0) (1, 1)

437

OP '=' (1, 2) (1, 3)

438

NUMBER '1' (1, 4) (1, 5)

439

OP '<<' (1, 6) (1, 8)

440

NUMBER '1' (1, 9) (1, 10)

441

OP '>>' (1, 11) (1, 13)

442

NUMBER '5' (1, 14) (1, 15)

Additive

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

446

>>> dump_tokens("x = 1 - y + 15 - 1 + 0x124 + z + a[5]")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

447

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

448

NAME 'x' (1, 0) (1, 1)

449

OP '=' (1, 2) (1, 3)

450

NUMBER '1' (1, 4) (1, 5)

451

OP '-' (1, 6) (1, 7)

452

NAME 'y' (1, 8) (1, 9)

453

OP '+' (1, 10) (1, 11)

454

NUMBER '15' (1, 12) (1, 14)

455

OP '-' (1, 15) (1, 16)

Mark Dickinson

2008-03-16 05:05:12 +0000

[diff] [blame]

456

NUMBER '1' (1, 17) (1, 18)

457

OP '+' (1, 19) (1, 20)

458

NUMBER '0x124' (1, 21) (1, 26)

459

OP '+' (1, 27) (1, 28)

460

NAME 'z' (1, 29) (1, 30)

461

OP '+' (1, 31) (1, 32)

462

NAME 'a' (1, 33) (1, 34)

463

OP '[' (1, 34) (1, 35)

464

NUMBER '5' (1, 35) (1, 36)

465

OP ']' (1, 36) (1, 37)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

Multiplicative

>>> dump_tokens("x = 1//1*1/5*12%0x12")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

470

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

471

NAME 'x' (1, 0) (1, 1)

472

OP '=' (1, 2) (1, 3)

473

NUMBER '1' (1, 4) (1, 5)

474

OP '//' (1, 5) (1, 7)

475

NUMBER '1' (1, 7) (1, 8)

476

OP '*' (1, 8) (1, 9)

477

NUMBER '1' (1, 9) (1, 10)

478

OP '/' (1, 10) (1, 11)

479

NUMBER '5' (1, 11) (1, 12)

480

OP '*' (1, 12) (1, 13)

481

NUMBER '12' (1, 13) (1, 15)

482

OP '%' (1, 15) (1, 16)

483

NUMBER '0x12' (1, 16) (1, 20)

Unary

>>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

488

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

489

OP '~' (1, 0) (1, 1)

490

NUMBER '1' (1, 1) (1, 2)

491

OP '^' (1, 3) (1, 4)

492

NUMBER '1' (1, 5) (1, 6)

493

OP '&' (1, 7) (1, 8)

494

NUMBER '1' (1, 9) (1, 10)

495

OP '|' (1, 11) (1, 12)

496

NUMBER '1' (1, 12) (1, 13)

497

OP '^' (1, 14) (1, 15)

498

OP '-' (1, 16) (1, 17)

499

NUMBER '1' (1, 17) (1, 18)

500

>>> dump_tokens("-1*1/1+1*1//1 - ---1**1")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

501

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

502

OP '-' (1, 0) (1, 1)

503

NUMBER '1' (1, 1) (1, 2)

504

OP '*' (1, 2) (1, 3)

505

NUMBER '1' (1, 3) (1, 4)

506

OP '/' (1, 4) (1, 5)

507

NUMBER '1' (1, 5) (1, 6)

508

OP '+' (1, 6) (1, 7)

509

NUMBER '1' (1, 7) (1, 8)

510

OP '*' (1, 8) (1, 9)

511

NUMBER '1' (1, 9) (1, 10)

512

OP '//' (1, 10) (1, 12)

513

NUMBER '1' (1, 12) (1, 13)

514

OP '-' (1, 14) (1, 15)

515

OP '-' (1, 16) (1, 17)

516

OP '-' (1, 17) (1, 18)

517

OP '-' (1, 18) (1, 19)

518

NUMBER '1' (1, 19) (1, 20)

519

OP '**' (1, 20) (1, 22)

520

NUMBER '1' (1, 22) (1, 23)

Selector

>>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

525

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

526

NAME 'import' (1, 0) (1, 6)

527

NAME 'sys' (1, 7) (1, 10)

528

OP ',' (1, 10) (1, 11)

529

NAME 'time' (1, 12) (1, 16)

530

NEWLINE '\\n' (1, 16) (1, 17)

531

NAME 'x' (2, 0) (2, 1)

532

OP '=' (2, 2) (2, 3)

533

NAME 'sys' (2, 4) (2, 7)

534

OP '.' (2, 7) (2, 8)

535

NAME 'modules' (2, 8) (2, 15)

536

OP '[' (2, 15) (2, 16)

537

STRING "'time'" (2, 16) (2, 22)

538

OP ']' (2, 22) (2, 23)

539

OP '.' (2, 23) (2, 24)

540

NAME 'time' (2, 24) (2, 28)

541

OP '(' (2, 28) (2, 29)

542

OP ')' (2, 29) (2, 30)

Methods

>>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

547

ENCODING 'utf-8' (0, 0) (0, 0)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

548

OP '@' (1, 0) (1, 1)

549

NAME 'staticmethod (1, 1) (1, 13)

550

NEWLINE '\\n' (1, 13) (1, 14)

551

NAME 'def' (2, 0) (2, 3)

552

NAME 'foo' (2, 4) (2, 7)

553

OP '(' (2, 7) (2, 8)

554

NAME 'x' (2, 8) (2, 9)

555

OP ',' (2, 9) (2, 10)

556

NAME 'y' (2, 10) (2, 11)

557

OP ')' (2, 11) (2, 12)

558

OP ':' (2, 12) (2, 13)

559

NAME 'pass' (2, 14) (2, 18)

560

561

Backslash means line continuation, except for comments

562

563

>>> roundtrip("x=1+\\\\n"

564

... "1\\n"

565

... "# This is a comment\\\\n"

566

... "# This also\\n")

567

True

568

>>> roundtrip("# Comment \\\\nx = 0")

569

True

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

570

571

Two string literals on the same line

572

573

>>> roundtrip("'' ''")

574

True

575

576

Test roundtrip on random python modules.

Antoine Pitrou

5bc4fa7

2010-10-14 15:34:31 +0000

[diff] [blame]

577

pass the '-ucpu' option to process the full directory.

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

578

579

>>> import random

580

>>> tempdir = os.path.dirname(f) or os.curdir

581

>>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

582

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

583

Tokenize is broken on test_pep3131.py because regular expressions are

584

broken on the obscure unicode identifiers in it. *sigh*

585

With roundtrip extended to test the 5-tuple mode of untokenize,

586

7 more testfiles fail. Remove them also until the failure is diagnosed.

587

Benjamin Peterson

963e402

2011-08-13 00:33:21 -0500

[diff] [blame]

588

>>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

589

>>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):

590

... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)

591

...

Antoine Pitrou

5bc4fa7

2010-10-14 15:34:31 +0000

[diff] [blame]

592

>>> if not support.is_resource_enabled("cpu"):

Christian Heimes

2008-03-28 00:55:15 +0000

[diff] [blame]

593

... testfiles = random.sample(testfiles, 10)

594

...

595

>>> for testfile in testfiles:

596

... if not roundtrip(open(testfile, 'rb')):

597

... print("Roundtrip failed for file %s" % testfile)

598

... break

599

... else: True

600

True

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

601

602

Evil tabs

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

603

Benjamin Peterson

a0dfa82

2009-11-13 02:25:08 +0000

[diff] [blame]

604

>>> dump_tokens("def f():\\n\\tif x\\n \\tpass")

605

ENCODING 'utf-8' (0, 0) (0, 0)

606

NAME 'def' (1, 0) (1, 3)

607

NAME 'f' (1, 4) (1, 5)

OP '(' (1, 5) (1, 6)

OP ')' (1, 6) (1, 7)

OP ':' (1, 7) (1, 8)

NEWLINE '\\n' (1, 8) (1, 9)

612

INDENT '\\t' (2, 0) (2, 1)

613

NAME 'if' (2, 1) (2, 3)

614

NAME 'x' (2, 4) (2, 5)

615

NEWLINE '\\n' (2, 5) (2, 6)

616

INDENT ' \\t' (3, 0) (3, 9)

617

NAME 'pass' (3, 9) (3, 13)

618

DEDENT '' (4, 0) (4, 0)

619

DEDENT '' (4, 0) (4, 0)

Benjamin Peterson

33856de

2010-08-30 14:41:20 +0000

[diff] [blame]

620

621

Non-ascii identifiers

622

623

>>> dump_tokens("Örter = 'places'\\ngrün = 'green'")

624

ENCODING 'utf-8' (0, 0) (0, 0)

625

NAME 'Örter' (1, 0) (1, 5)

626

OP '=' (1, 6) (1, 7)

627

STRING "'places'" (1, 8) (1, 16)

628

NEWLINE '\\n' (1, 16) (1, 17)

629

NAME 'grün' (2, 0) (2, 4)

630

OP '=' (2, 5) (2, 6)

631

STRING "'green'" (2, 7) (2, 14)

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

632

633

Legacy unicode literals:

634

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

635

>>> dump_tokens("Örter = u'places'\\ngrün = U'green'")

Armin Ronacher

c0eaeca

2012-03-04 13:07:57 +0000

[diff] [blame]

636

ENCODING 'utf-8' (0, 0) (0, 0)

637

NAME 'Örter' (1, 0) (1, 5)

638

OP '=' (1, 6) (1, 7)

639

STRING "u'places'" (1, 8) (1, 17)

640

NEWLINE '\\n' (1, 17) (1, 18)

641

NAME 'grün' (2, 0) (2, 4)

642

OP '=' (2, 5) (2, 6)

Christian Heimes

0b3847d

2012-06-20 11:17:58 +0200

[diff] [blame]

643

STRING "U'green'" (2, 7) (2, 15)

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

644

"""

645

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

646

from test import support

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

647

from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

648

STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

649

open as tokenize_open, Untokenizer)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

650

from io import BytesIO

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

651

from unittest import TestCase, mock

Jason R. Coombs

7cf3638

2015-06-20 19:13:50 -0400

[diff] [blame]

652

import os

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

653

import token

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

654

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

655

def dump_tokens(s):

656

"""Print out the tokens in s in a table format.

657

658

The ENDMARKER is omitted.

659

"""

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

660

f = BytesIO(s.encode('utf-8'))

661

for type, token, start, end, line in tokenize(f.readline):

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

662

if type == ENDMARKER:

663

break

664

type = tok_name[type]

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

665

print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

666

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

667

def roundtrip(f):

668

"""

669

Test roundtrip for `untokenize`. `f` is an open file or a string.

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

670

The source code in f is tokenized to both 5- and 2-tuples.

671

Both sequences are converted back to source code via

672

tokenize.untokenize(), and the latter tokenized again to 2-tuples.

673

The test fails if the 3 pair tokenizations do not match.

674

675

When untokenize bugs are fixed, untokenize with 5-tuples should

676

reproduce code that does not contain a backslash continuation

677

following spaces. A proper test should test this.

678

679

This function would be more useful for correcting bugs if it reported

680

the first point of failure, like assertEqual, rather than just

681

returning False -- or if it were only used in unittests and not

682

doctest and actually used assertEqual.

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

683

"""

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

684

# Get source code and original tokenizations

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

685

if isinstance(f, str):

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

686

code = f.encode('utf-8')

687

else:

688

code = f.read()

Brian Curtin

9f5f65c

2010-10-30 21:35:28 +0000

[diff] [blame]

689

f.close()

Terry Jan Reedy

2014-02-23 18:00:31 -0500

[diff] [blame]

690

readline = iter(code.splitlines(keepends=True)).__next__

691

tokens5 = list(tokenize(readline))

692

tokens2 = [tok[:2] for tok in tokens5]

693

# Reproduce tokens2 from pairs

694

bytes_from2 = untokenize(tokens2)

695

readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__

696

tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]

697

# Reproduce tokens2 from 5-tuples

698

bytes_from5 = untokenize(tokens5)

699

readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__

700

tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]

701

# Compare 3 versions

702

return tokens2 == tokens2_from2 == tokens2_from5

Thomas Wouters

2006-12-13 04:49:30 +0000

[diff] [blame]

703

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

704

# This is an example from the docs, set up as a doctest.

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

705

def decistmt(s):

706

"""Substitute Decimals for floats in a string of statements.

707

708

>>> from decimal import Decimal

Georg Brandl

88fc664

2007-02-09 21:28:07 +0000

[diff] [blame]

709

>>> s = 'print(+21.3e-5*-.1234/81.7)'

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

710

>>> decistmt(s)

Georg Brandl

88fc664

2007-02-09 21:28:07 +0000

[diff] [blame]

711

"print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

712

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

713

The format of the exponent is inherited from the platform C library.

714

Known cases are "e-007" (Windows) and "e-07" (not Windows). Since

Mark Dickinson

388122d

2010-08-04 20:56:28 +0000

[diff] [blame]

715

we're only showing 11 digits, and the 12th isn't close to 5, the

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

716

rest of the output should be platform-independent.

717

718

>>> exec(s) #doctest: +ELLIPSIS

Mark Dickinson

388122d

2010-08-04 20:56:28 +0000

[diff] [blame]

719

-3.2171603427...e-0...7

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

720

721

Output from calculations with Decimal should be identical across all

722

platforms.

723

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

724

>>> exec(decistmt(s))

725

-3.217160342717258261933904529E-7

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

726

"""

727

result = []

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

728

g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

729

for toknum, tokval, _, _, _ in g:

730

if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens

result.extend([

(NAME, 'Decimal'),

(OP, '('),

(STRING, repr(tokval)),

(OP, ')')

])

else:

result.append((toknum, tokval))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

739

return untokenize(result).decode('utf-8')

740

741

742

class TestTokenizerAdheresToPep0263(TestCase):

743

"""

744

Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.

745

"""

746

747

def _testFile(self, filename):

748

path = os.path.join(os.path.dirname(__file__), filename)

749

return roundtrip(open(path, 'rb'))

750

751

def test_utf8_coding_cookie_and_no_utf8_bom(self):

Ned Deily

2ea6fcc

2011-07-19 16:15:27 -0700

[diff] [blame]

752

f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

753

self.assertTrue(self._testFile(f))

754

755

def test_latin1_coding_cookie_and_utf8_bom(self):

756

"""

757

As per PEP 0263, if a file starts with a utf-8 BOM signature, the only

758

allowed encoding for the comment is 'utf-8'. The text file used in

759

this test starts with a BOM signature, but specifies latin1 as the

760

coding, so verify that a SyntaxError is raised, which matches the

761

behaviour of the interpreter when it encounters a similar condition.

762

"""

763

f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'

Benjamin Peterson

c9c0f20

2009-06-30 23:06:06 +0000

[diff] [blame]

764

self.assertRaises(SyntaxError, self._testFile, f)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

765

766

def test_no_coding_cookie_and_utf8_bom(self):

767

f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'

768

self.assertTrue(self._testFile(f))

769

770

def test_utf8_coding_cookie_and_utf8_bom(self):

771

f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'

772

self.assertTrue(self._testFile(f))

773

Florent Xicluna

11f0b41

2012-07-07 12:13:35 +0200

[diff] [blame]

774

def test_bad_coding_cookie(self):

775

self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')

776

self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')

777

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

778

779

class Test_Tokenize(TestCase):

780

781

def test__tokenize_decodes_with_specified_encoding(self):

782

literal = '"ЉЊЈЁЂ"'

783

line = literal.encode('utf-8')

first = False

def readline():

nonlocal first

if not first:

first = True

return line

else:

return b''

# skip the initial encoding token and the end token

794

tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]

795

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

796

self.assertEqual(tokens, expected_tokens,

797

"bytes not decoded with encoding")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

798

799

def test__tokenize_does_not_decode_with_encoding_none(self):

literal = '"ЉЊЈЁЂ"'

first = False

def readline():

nonlocal first

if not first:

first = True

return literal

else:

return b''

# skip the end token

tokens = list(_tokenize(readline, encoding=None))[:-1]

812

expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

813

self.assertEqual(tokens, expected_tokens,

814

"string not tokenized when encoding is None")

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

815

816

817

class TestDetectEncoding(TestCase):

818

819

def get_readline(self, lines):

index = 0

def readline():

nonlocal index

if index == len(lines):

raise StopIteration

line = lines[index]

index += 1

return line

return readline

def test_no_bom_no_encoding_cookie(self):

831

lines = (

832

b'# something\n',

833

b'print(something)\n',

834

b'do_something(else)\n'

835

)

836

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

837

self.assertEqual(encoding, 'utf-8')

838

self.assertEqual(consumed_lines, list(lines[:2]))

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

839

840

def test_bom_no_cookie(self):

841

lines = (

842

b'\xef\xbb\xbf# something\n',

843

b'print(something)\n',

844

b'do_something(else)\n'

845

)

846

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

847

self.assertEqual(encoding, 'utf-8-sig')

848

self.assertEqual(consumed_lines,

849

[b'# something\n', b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

850

851

def test_cookie_first_line_no_bom(self):

852

lines = (

853

b'# -*- coding: latin-1 -*-\n',

854

b'print(something)\n',

855

b'do_something(else)\n'

856

)

857

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

858

self.assertEqual(encoding, 'iso-8859-1')

859

self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

860

861

def test_matched_bom_and_cookie_first_line(self):

862

lines = (

863

b'\xef\xbb\xbf# coding=utf-8\n',

864

b'print(something)\n',

865

b'do_something(else)\n'

866

)

867

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

868

self.assertEqual(encoding, 'utf-8-sig')

869

self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

870

871

def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):

872

lines = (

873

b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',

874

b'print(something)\n',

875

b'do_something(else)\n'

876

)

877

readline = self.get_readline(lines)

878

self.assertRaises(SyntaxError, detect_encoding, readline)

879

880

def test_cookie_second_line_no_bom(self):

881

lines = (

882

b'#! something\n',

883

b'# vim: set fileencoding=ascii :\n',

884

b'print(something)\n',

885

b'do_something(else)\n'

886

)

887

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

888

self.assertEqual(encoding, 'ascii')

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

889

expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

890

self.assertEqual(consumed_lines, expected)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

891

892

def test_matched_bom_and_cookie_second_line(self):

893

lines = (

894

b'\xef\xbb\xbf#! something\n',

895

b'f# coding=utf-8\n',

896

b'print(something)\n',

897

b'do_something(else)\n'

898

)

899

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

900

self.assertEqual(encoding, 'utf-8-sig')

901

self.assertEqual(consumed_lines,

902

[b'#! something\n', b'f# coding=utf-8\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

903

904

def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):

905

lines = (

906

b'\xef\xbb\xbf#! something\n',

907

b'# vim: set fileencoding=ascii :\n',

908

b'print(something)\n',

909

b'do_something(else)\n'

910

)

911

readline = self.get_readline(lines)

912

self.assertRaises(SyntaxError, detect_encoding, readline)

913

Serhiy Storchaka

768c16c

2014-01-09 18:36:09 +0200

[diff] [blame]

914

def test_cookie_second_line_noncommented_first_line(self):

915

lines = (

916

b"print('\xc2\xa3')\n",

917

b'# vim: set fileencoding=iso8859-15 :\n',

918

b"print('\xe2\x82\xac')\n"

919

)

920

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

921

self.assertEqual(encoding, 'utf-8')

922

expected = [b"print('\xc2\xa3')\n"]

923

self.assertEqual(consumed_lines, expected)

924

925

def test_cookie_second_line_commented_first_line(self):

926

lines = (

927

b"#print('\xc2\xa3')\n",

928

b'# vim: set fileencoding=iso8859-15 :\n',

929

b"print('\xe2\x82\xac')\n"

930

)

931

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

932

self.assertEqual(encoding, 'iso8859-15')

933

expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']

934

self.assertEqual(consumed_lines, expected)

935

936

def test_cookie_second_line_empty_first_line(self):

937

lines = (

938

b'\n',

939

b'# vim: set fileencoding=iso8859-15 :\n',

940

b"print('\xe2\x82\xac')\n"

941

)

942

encoding, consumed_lines = detect_encoding(self.get_readline(lines))

943

self.assertEqual(encoding, 'iso8859-15')

944

expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']

945

self.assertEqual(consumed_lines, expected)

946

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

947

def test_latin1_normalization(self):

948

# See get_normal_name() in tokenizer.c.

949

encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",

950

"iso-8859-1-unix", "iso-latin-1-mac")

951

for encoding in encodings:

952

for rep in ("-", "_"):

953

enc = encoding.replace("-", rep)

954

lines = (b"#!/usr/bin/python\n",

955

b"# coding: " + enc.encode("ascii") + b"\n",

956

b"print(things)\n",

957

b"do_something += 4\n")

958

rl = self.get_readline(lines)

959

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

960

self.assertEqual(found, "iso-8859-1")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

961

Martin v. Löwis

63674f4

2012-04-20 14:36:47 +0200

[diff] [blame]

962

def test_syntaxerror_latin1(self):

963

# Issue 14629: need to raise SyntaxError if the first

964

# line(s) have non-UTF-8 characters

965

lines = (

966

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

967

)

968

readline = self.get_readline(lines)

969

self.assertRaises(SyntaxError, detect_encoding, readline)

970

971

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

972

def test_utf8_normalization(self):

973

# See get_normal_name() in tokenizer.c.

974

encodings = ("utf-8", "utf-8-mac", "utf-8-unix")

975

for encoding in encodings:

976

for rep in ("-", "_"):

977

enc = encoding.replace("-", rep)

978

lines = (b"#!/usr/bin/python\n",

979

b"# coding: " + enc.encode("ascii") + b"\n",

980

b"1 + 3\n")

981

rl = self.get_readline(lines)

982

found, consumed_lines = detect_encoding(rl)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

983

self.assertEqual(found, "utf-8")

Benjamin Peterson

2009-10-09 21:43:09 +0000

[diff] [blame]

984

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

985

def test_short_files(self):

986

readline = self.get_readline((b'print(something)\n',))

987

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

988

self.assertEqual(encoding, 'utf-8')

989

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

990

991

encoding, consumed_lines = detect_encoding(self.get_readline(()))

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

992

self.assertEqual(encoding, 'utf-8')

993

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

994

995

readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))

996

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

997

self.assertEqual(encoding, 'utf-8-sig')

998

self.assertEqual(consumed_lines, [b'print(something)\n'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

999

1000

readline = self.get_readline((b'\xef\xbb\xbf',))

1001

encoding, consumed_lines = detect_encoding(readline)

Ezio Melotti

2010-11-20 19:04:17 +0000

[diff] [blame]

1002

self.assertEqual(encoding, 'utf-8-sig')

1003

self.assertEqual(consumed_lines, [])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1004

Benjamin Peterson

433f32c

2008-12-12 01:25:05 +0000

[diff] [blame]

1005

readline = self.get_readline((b'# coding: bad\n',))

1006

self.assertRaises(SyntaxError, detect_encoding, readline)

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1007

Serhiy Storchaka

dafea85

2013-09-16 23:51:56 +0300

[diff] [blame]

1008

def test_false_encoding(self):

1009

# Issue 18873: "Encoding" detected in non-comment lines

1010

readline = self.get_readline((b'print("#coding=fake")',))

1011

encoding, consumed_lines = detect_encoding(readline)

1012

self.assertEqual(encoding, 'utf-8')

1013

self.assertEqual(consumed_lines, [b'print("#coding=fake")'])

1014

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1015

def test_open(self):

1016

filename = support.TESTFN + '.py'

1017

self.addCleanup(support.unlink, filename)

1018

1019

# test coding cookie

1020

for encoding in ('iso-8859-15', 'utf-8'):

1021

with open(filename, 'w', encoding=encoding) as fp:

1022

print("# coding: %s" % encoding, file=fp)

1023

print("print('euro:\u20ac')", file=fp)

1024

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1025

self.assertEqual(fp.encoding, encoding)

1026

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1027

1028

# test BOM (no coding cookie)

1029

with open(filename, 'w', encoding='utf-8-sig') as fp:

1030

print("print('euro:\u20ac')", file=fp)

1031

with tokenize_open(filename) as fp:

Victor Stinner

92665ab

2010-11-09 01:11:31 +0000

[diff] [blame]

1032

self.assertEqual(fp.encoding, 'utf-8-sig')

1033

self.assertEqual(fp.mode, 'r')

Victor Stinner

58c0752

2010-11-09 01:08:59 +0000

[diff] [blame]

1034

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1035

def test_filename_in_exception(self):

1036

# When possible, include the file name in the exception.

1037

path = 'some_file_path'

1038

lines = (

1039

b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S

1040

)

1041

class Bunk:

1042

def __init__(self, lines, path):

self.name = path

self._lines = lines

self._index = 0

def readline(self):

if self._index == len(lines):

1049

raise StopIteration

1050

line = lines[self._index]

self._index += 1

return line

with self.assertRaises(SyntaxError):

1055

ins = Bunk(lines, path)

1056

# Make sure lacking a name isn't an issue.

1057

del ins.name

1058

detect_encoding(ins.readline)

1059

with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):

1060

ins = Bunk(lines, path)

1061

detect_encoding(ins.readline)

1062

Victor Stinner

387729e

2015-05-26 00:43:58 +0200

[diff] [blame]

1063

def test_open_error(self):

1064

# Issue #23840: open() must close the binary file on error

1065

m = BytesIO(b'#coding:xxx')

1066

with mock.patch('tokenize._builtin_open', return_value=m):

1067

self.assertRaises(SyntaxError, tokenize_open, 'foobar')

1068

self.assertTrue(m.closed)

1069

1070

Brett Cannon

c33f3f2

2012-04-20 13:23:54 -0400

[diff] [blame]

1071

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1072

class TestTokenize(TestCase):

1073

1074

def test_tokenize(self):

1075

import tokenize as tokenize_module

1076

encoding = object()

1077

encoding_used = None

1078

def mock_detect_encoding(readline):

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1079

return encoding, [b'first', b'second']

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1080

1081

def mock__tokenize(readline, encoding):

1082

nonlocal encoding_used

1083

encoding_used = encoding

1084

out = []

1085

while True:

1086

next_line = readline()

1087

if next_line:

1088

out.append(next_line)

continue

return out

counter = 0

def mock_readline():

nonlocal counter

counter += 1

if counter == 5:

return b''

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1098

return str(counter).encode()

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1099

1100

orig_detect_encoding = tokenize_module.detect_encoding

1101

orig__tokenize = tokenize_module._tokenize

1102

tokenize_module.detect_encoding = mock_detect_encoding

1103

tokenize_module._tokenize = mock__tokenize

1104

try:

1105

results = tokenize(mock_readline)

Serhiy Storchaka

74a49ac

2015-03-20 16:46:19 +0200

[diff] [blame]

1106

self.assertEqual(list(results),

1107

[b'first', b'second', b'1', b'2', b'3', b'4'])

Trent Nelson

2008-03-18 22:41:35 +0000

[diff] [blame]

1108

finally:

1109

tokenize_module.detect_encoding = orig_detect_encoding

1110

tokenize_module._tokenize = orig__tokenize

1111

1112

self.assertTrue(encoding_used, encoding)

Raymond Hettinger

2005-06-10 11:05:19 +0000

[diff] [blame]

1113

Meador Inge

00c7f85

2012-01-19 00:44:45 -0600

[diff] [blame]

1114

def assertExactTypeEqual(self, opstr, *optypes):

1115

tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))

1116

num_optypes = len(optypes)

1117

self.assertEqual(len(tokens), 2 + num_optypes)

1118

self.assertEqual(token.tok_name[tokens[0].exact_type],

1119

token.tok_name[ENCODING])

1120

for i in range(num_optypes):

1121

self.assertEqual(token.tok_name[tokens[i + 1].exact_type],

1122

token.tok_name[optypes[i]])

1123

self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],

1124

token.tok_name[token.ENDMARKER])

1125

1126

def test_exact_type(self):

1127

self.assertExactTypeEqual('()', token.LPAR, token.RPAR)

1128

self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)

1129

self.assertExactTypeEqual(':', token.COLON)

1130

self.assertExactTypeEqual(',', token.COMMA)

1131

self.assertExactTypeEqual(';', token.SEMI)

1132

self.assertExactTypeEqual('+', token.PLUS)

1133

self.assertExactTypeEqual('-', token.MINUS)

1134

self.assertExactTypeEqual('*', token.STAR)

1135

self.assertExactTypeEqual('/', token.SLASH)

1136

self.assertExactTypeEqual('|', token.VBAR)

1137

self.assertExactTypeEqual('&', token.AMPER)

1138

self.assertExactTypeEqual('<', token.LESS)

1139

self.assertExactTypeEqual('>', token.GREATER)

1140

self.assertExactTypeEqual('=', token.EQUAL)

1141

self.assertExactTypeEqual('.', token.DOT)

1142

self.assertExactTypeEqual('%', token.PERCENT)

1143

self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)

1144

self.assertExactTypeEqual('==', token.EQEQUAL)

1145

self.assertExactTypeEqual('!=', token.NOTEQUAL)

1146

self.assertExactTypeEqual('<=', token.LESSEQUAL)

1147

self.assertExactTypeEqual('>=', token.GREATEREQUAL)

1148

self.assertExactTypeEqual('~', token.TILDE)

1149

self.assertExactTypeEqual('^', token.CIRCUMFLEX)

1150

self.assertExactTypeEqual('<<', token.LEFTSHIFT)

1151

self.assertExactTypeEqual('>>', token.RIGHTSHIFT)

1152

self.assertExactTypeEqual('**', token.DOUBLESTAR)

1153

self.assertExactTypeEqual('+=', token.PLUSEQUAL)

1154

self.assertExactTypeEqual('-=', token.MINEQUAL)

1155

self.assertExactTypeEqual('*=', token.STAREQUAL)

1156

self.assertExactTypeEqual('/=', token.SLASHEQUAL)

1157

self.assertExactTypeEqual('%=', token.PERCENTEQUAL)

1158

self.assertExactTypeEqual('&=', token.AMPEREQUAL)

1159

self.assertExactTypeEqual('|=', token.VBAREQUAL)

1160

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1161

self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)

1162

self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)

1163

self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)

1164

self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)

1165

self.assertExactTypeEqual('//', token.DOUBLESLASH)

1166

self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)

1167

self.assertExactTypeEqual('@', token.AT)

1168

1169

self.assertExactTypeEqual('a**2+b**2==c**2',

1170

NAME, token.DOUBLESTAR, NUMBER,

1171

token.PLUS,

1172

NAME, token.DOUBLESTAR, NUMBER,

1173

token.EQEQUAL,

1174

NAME, token.DOUBLESTAR, NUMBER)

1175

self.assertExactTypeEqual('{1, 2, 3}',

1176

token.LBRACE,

1177

token.NUMBER, token.COMMA,

1178

token.NUMBER, token.COMMA,

1179

token.NUMBER,

1180

token.RBRACE)

1181

self.assertExactTypeEqual('^(x & 0x1)',

1182

token.CIRCUMFLEX,

1183

token.LPAR,

1184

token.NAME, token.AMPER, token.NUMBER,

1185

token.RPAR)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1186

Ezio Melotti

fafa8b7

2012-11-03 17:46:51 +0200

[diff] [blame]

1187

def test_pathological_trailing_whitespace(self):

1188

# See http://bugs.python.org/issue16152

1189

self.assertExactTypeEqual('@ ', token.AT)

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1190

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1191

class UntokenizeTest(TestCase):

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1192

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1193

def test_bad_input_order(self):

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1194

# raise if previous row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

u = Untokenizer()

u.prev_row = 2

u.prev_col = 2

with self.assertRaises(ValueError) as cm:

1199

u.add_whitespace((1,3))

Terry Jan Reedy

58edfd9

2014-02-17 16:49:06 -0500

[diff] [blame]

1200

self.assertEqual(cm.exception.args[0],

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1201

'start (1,3) precedes previous end (2,2)')

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1202

# raise if previous column in row

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1203

self.assertRaises(ValueError, u.add_whitespace, (2,1))

1204

Terry Jan Reedy

2014-02-23 23:33:08 -0500

[diff] [blame]

1205

def test_backslash_continuation(self):

1206

# The problem is that <whitespace>\<newline> leaves no token

u = Untokenizer()

u.prev_row = 1

u.prev_col = 1

u.tokens = []

u.add_whitespace((2, 0))

1212

self.assertEqual(u.tokens, ['\\\n'])

1213

u.prev_row = 2

1214

u.add_whitespace((4, 4))

1215

self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])

1216

self.assertTrue(roundtrip('a\n b\n c\n \\\n c\n'))

1217

Terry Jan Reedy

5b8d2c3

2014-02-17 23:12:16 -0500

[diff] [blame]

1218

def test_iter_compat(self):

1219

u = Untokenizer()

1220

token = (NAME, 'Hello')

1221

tokens = [(ENCODING, 'utf-8'), token]

1222

u.compat(token, iter([]))

1223

self.assertEqual(u.tokens, ["Hello "])

1224

u = Untokenizer()

1225

self.assertEqual(u.untokenize(iter([token])), 'Hello ')

1226

u = Untokenizer()

1227

self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')

1228

self.assertEqual(u.encoding, 'utf-8')

1229

self.assertEqual(untokenize(iter(tokens)), b'Hello ')

1230

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1231

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1232

class TestRoundtrip(TestCase):

1233

def roundtrip(self, code):

1234

if isinstance(code, str):

1235

code = code.encode('utf-8')

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1236

return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1237

1238

def test_indentation_semantics_retained(self):

1239

"""

1240

Ensure that although whitespace might be mutated in a roundtrip,

1241

the semantic meaning of the indentation remains consistent.

1242

"""

1243

code = "if False:\n\tx=3\n\tx=3\n"

Jason R. Coombs

b6d1cdd

2015-06-25 22:42:24 -0400

[diff] [blame]

1244

codelines = self.roundtrip(code).split('\n')

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1245

self.assertEqual(codelines[1], codelines[2])

1246

1247

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1248

__test__ = {"doctests" : doctests, 'decistmt': decistmt}

1249

Thomas Wouters

2006-04-21 10:40:58 +0000

[diff] [blame]

1250

def test_main():

Christian Heimes

2008-03-16 00:07:10 +0000

[diff] [blame]

1251

from test import test_tokenize

Benjamin Peterson

ee8712c

2008-05-20 21:35:26 +0000

[diff] [blame]

1252

support.run_doctest(test_tokenize, True)

1253

support.run_unittest(TestTokenizerAdheresToPep0263)

1254

support.run_unittest(Test_Tokenize)

1255

support.run_unittest(TestDetectEncoding)

1256

support.run_unittest(TestTokenize)

Terry Jan Reedy

2014-02-17 16:45:48 -0500

[diff] [blame]

1257

support.run_unittest(UntokenizeTest)

Jason R. Coombs

2015-06-20 19:52:22 -0400

[diff] [blame]

1258

support.run_unittest(TestRoundtrip)

Neal Norwitz

c150536

2006-12-28 06:47:50 +0000

[diff] [blame]

1259

Thomas Wouters