Blame - Lib/idlelib/HyperParser.py - platform/external/python/cpython3

2014-06-16 19:01:01 -0400

[diff] [blame]

1

"""Provide advanced parsing abilities for ParenMatch and other extensions.

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

2

3

HyperParser uses PyParser. PyParser mostly gives information on the

4

proper indentation of code. HyperParser gives additional information on

5

the structure of code.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

6

"""

7

8

import string

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

9

from keyword import iskeyword

Kurt B. Kaiser

2d7f6a0

2007-08-22 23:01:33 +0000

[diff] [blame]

10

from idlelib import PyParse

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

11

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

12

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

13

# all ASCII chars that may be in an identifier

14

_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")

15

# all ASCII chars that may be the first char of an identifier

16

_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")

17

18

# lookup table for whether 7-bit ASCII chars are valid in a Python identifier

19

_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]

20

# lookup table for whether 7-bit ASCII chars are valid as the first

21

# char in a Python identifier

22

_IS_ASCII_ID_FIRST_CHAR = \

23

[(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]

24

25

26

class HyperParser:

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

27

def __init__(self, editwin, index):

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

28

"To initialize, analyze the surroundings of the given index."

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

29

30

self.editwin = editwin

31

self.text = text = editwin.text

32

33

parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)

34

35

def index2line(index):

36

return int(float(index))

37

lno = index2line(text.index(index))

38

39

if not editwin.context_use_ps1:

40

for context in editwin.num_context_lines:

41

startat = max(lno - context, 1)

Brett Cannon

0b70cca

2006-08-25 02:59:59 +0000

[diff] [blame]

42

startatindex = repr(startat) + ".0"

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

43

stopatindex = "%d.end" % lno

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

44

# We add the newline because PyParse requires a newline

45

# at end. We add a space so that index won't be at end

46

# of line, so that its status will be the same as the

47

# char before it, if should.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

48

parser.set_str(text.get(startatindex, stopatindex)+' \n')

49

bod = parser.find_good_parse_start(

50

editwin._build_char_in_string_func(startatindex))

51

if bod is not None or startat == 1:

52

break

53

parser.set_lo(bod or 0)

54

else:

55

r = text.tag_prevrange("console", index)

if r:

startatindex = r[1]

else:

startatindex = "1.0"

stopatindex = "%d.end" % lno

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

61

# We add the newline because PyParse requires it. We add a

62

# space so that index won't be at end of line, so that its

63

# status will be the same as the char before it, if should.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

64

parser.set_str(text.get(startatindex, stopatindex)+' \n')

65

parser.set_lo(0)

66

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

67

# We want what the parser has, minus the last newline and space.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

68

self.rawtext = parser.str[:-2]

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

69

# Parser.str apparently preserves the statement we are in, so

70

# that stopatindex can be used to synchronize the string with

71

# the text box indices.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

72

self.stopatindex = stopatindex

73

self.bracketing = parser.get_last_stmt_bracketing()

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

74

# find which pairs of bracketing are openers. These always

75

# correspond to a character of rawtext.

76

self.isopener = [i>0 and self.bracketing[i][1] >

77

self.bracketing[i-1][1]

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

78

for i in range(len(self.bracketing))]

79

80

self.set_index(index)

81

82

def set_index(self, index):

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

83

"""Set the index to which the functions relate.

84

85

The index must be in the same statement.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

86

"""

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

87

indexinrawtext = (len(self.rawtext) -

88

len(self.text.get(index, self.stopatindex)))

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

89

if indexinrawtext < 0:

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

90

raise ValueError("Index %s precedes the analyzed statement"

91

% index)

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

92

self.indexinrawtext = indexinrawtext

93

# find the rightmost bracket to which index belongs

94

self.indexbracket = 0

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

95

while (self.indexbracket < len(self.bracketing)-1 and

96

self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

97

self.indexbracket += 1

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

98

if (self.indexbracket < len(self.bracketing)-1 and

99

self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and

100

not self.isopener[self.indexbracket+1]):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

101

self.indexbracket += 1

102

103

def is_in_string(self):

Terry Jan Reedy

2014-06-16 19:01:01 -0400

[diff] [blame]

104

"""Is the index given to the HyperParser in a string?"""

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

105

# The bracket to which we belong should be an opener.

106

# If it's an opener, it has to have a character.

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

107

return (self.isopener[self.indexbracket] and

108

self.rawtext[self.bracketing[self.indexbracket][0]]

109

in ('"', "'"))

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

110

111

def is_in_code(self):

Terry Jan Reedy

2014-06-16 19:01:01 -0400

[diff] [blame]

112

"""Is the index given to the HyperParser in normal code?"""

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

113

return (not self.isopener[self.indexbracket] or

114

self.rawtext[self.bracketing[self.indexbracket][0]]

115

not in ('#', '"', "'"))

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

116

117

def get_surrounding_brackets(self, openers='([{', mustclose=False):

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

118

"""Return bracket indexes or None.

119

120

If the index given to the HyperParser is surrounded by a

121

bracket defined in openers (or at least has one before it),

122

return the indices of the opening bracket and the closing

123

bracket (or the end of line, whichever comes first).

124

125

If it is not surrounded by brackets, or the end of line comes

126

before the closing bracket and mustclose is True, returns None.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

127

"""

Terry Jan Reedy

d0c1ea4

2014-06-16 02:40:24 -0400

[diff] [blame]

128

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

129

bracketinglevel = self.bracketing[self.indexbracket][1]

130

before = self.indexbracket

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

131

while (not self.isopener[before] or

132

self.rawtext[self.bracketing[before][0]] not in openers or

133

self.bracketing[before][1] > bracketinglevel):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

before -= 1

if before < 0:

return None

bracketinglevel = min(bracketinglevel, self.bracketing[before][1])

138

after = self.indexbracket + 1

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

139

while (after < len(self.bracketing) and

140

self.bracketing[after][1] >= bracketinglevel):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

141

after += 1

142

143

beforeindex = self.text.index("%s-%dc" %

144

(self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

145

if (after >= len(self.bracketing) or

146

self.bracketing[after][0] > len(self.rawtext)):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

147

if mustclose:

148

return None

149

afterindex = self.stopatindex

150

else:

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

151

# We are after a real char, so it is a ')' and we give the

152

# index before it.

153

afterindex = self.text.index(

154

"%s-%dc" % (self.stopatindex,

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

155

len(self.rawtext)-(self.bracketing[after][0]-1)))

156

157

return beforeindex, afterindex

158

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

159

# the set of built-in identifiers which are also keywords,

160

# i.e. keyword.iskeyword() returns True for them

161

_ID_KEYWORDS = frozenset({"True", "False", "None"})

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

162

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

163

@classmethod

164

def _eat_identifier(cls, str, limit, pos):

165

"""Given a string and pos, return the number of chars in the

166

identifier which ends at pos, or 0 if there is no such one.

167

168

This ignores non-identifier eywords are not identifiers.

169

"""

170

is_ascii_id_char = _IS_ASCII_ID_CHAR

171

172

# Start at the end (pos) and work backwards.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

173

i = pos

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

174

175

# Go backwards as long as the characters are valid ASCII

176

# identifier characters. This is an optimization, since it

177

# is faster in the common case where most of the characters

178

# are ASCII.

179

while i > limit and (

180

ord(str[i - 1]) < 128 and

181

is_ascii_id_char[ord(str[i - 1])]

182

):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

183

i -= 1

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

184

185

# If the above loop ended due to reaching a non-ASCII

186

# character, continue going backwards using the most generic

187

# test for whether a string contains only valid identifier

188

# characters.

189

if i > limit and ord(str[i - 1]) >= 128:

190

while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():

191

i -= 4

192

if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():

193

i -= 2

194

if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():

195

i -= 1

196

197

# The identifier candidate starts here. If it isn't a valid

198

# identifier, don't eat anything. At this point that is only

199

# possible if the first character isn't a valid first

200

# character for an identifier.

201

if not str[i:pos].isidentifier():

202

return 0

203

elif i < pos:

204

# All characters in str[i:pos] are valid ASCII identifier

205

# characters, so it is enough to check that the first is

206

# valid as the first character of an identifier.

207

if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:

208

return 0

209

210

# All keywords are valid identifiers, but should not be

211

# considered identifiers here, except for True, False and None.

212

if i < pos and (

213

iskeyword(str[i:pos]) and

214

str[i:pos] not in cls._ID_KEYWORDS

):

return 0

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

218

return pos - i

219

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

220

# This string includes all chars that may be in a white space

221

_whitespace_chars = " \t\n\\"

222

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

223

def get_expression(self):

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

224

"""Return a string with the Python expression which ends at the

225

given index, which is empty if there is no real one.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

226

"""

227

if not self.is_in_code():

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

228

raise ValueError("get_expression should only be called"

229

"if index is inside a code.")

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

230

231

rawtext = self.rawtext

232

bracketing = self.bracketing

233

234

brck_index = self.indexbracket

235

brck_limit = bracketing[brck_index][0]

236

pos = self.indexinrawtext

237

238

last_identifier_pos = pos

239

postdot_phase = True

240

241

while 1:

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

242

# Eat whitespaces, comments, and if postdot_phase is False - a dot

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

243

while 1:

244

if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:

245

# Eat a whitespace

246

pos -= 1

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

247

elif (not postdot_phase and

248

pos > brck_limit and rawtext[pos-1] == '.'):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

249

# Eat a dot

250

pos -= 1

251

postdot_phase = True

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

252

# The next line will fail if we are *inside* a comment,

253

# but we shouldn't be.

254

elif (pos == brck_limit and brck_index > 0 and

255

rawtext[bracketing[brck_index-1][0]] == '#'):

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

256

# Eat a comment

257

brck_index -= 2

258

brck_limit = bracketing[brck_index][0]

259

pos = bracketing[brck_index+1][0]

260

else:

261

# If we didn't eat anything, quit.

262

break

263

264

if not postdot_phase:

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

265

# We didn't find a dot, so the expression end at the

266

# last identifier pos.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

267

break

268

269

ret = self._eat_identifier(rawtext, brck_limit, pos)

270

if ret:

271

# There is an identifier to eat

272

pos = pos - ret

273

last_identifier_pos = pos

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

274

# Now, to continue the search, we must find a dot.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

275

postdot_phase = False

276

# (the loop continues now)

277

278

elif pos == brck_limit:

Terry Jan Reedy

2014-06-16 02:33:35 -0400

[diff] [blame]

279

# We are at a bracketing limit. If it is a closing

280

# bracket, eat the bracket, otherwise, stop the search.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

281

level = bracketing[brck_index][1]

282

while brck_index > 0 and bracketing[brck_index-1][1] > level:

283

brck_index -= 1

284

if bracketing[brck_index][0] == brck_limit:

285

# We were not at the end of a closing bracket

286

break

287

pos = bracketing[brck_index][0]

288

brck_index -= 1

289

brck_limit = bracketing[brck_index][0]

290

last_identifier_pos = pos

291

if rawtext[pos] in "([":

292

# [] and () may be used after an identifier, so we

293

# continue. postdot_phase is True, so we don't allow a dot.

294

pass

295

else:

296

# We can't continue after other types of brackets

Serhiy Storchaka

8c126d7

2013-01-01 22:25:59 +0200

[diff] [blame]

297

if rawtext[pos] in "'\"":

298

# Scan a string prefix

Serhiy Storchaka

eb6aa5c

2013-01-01 22:32:42 +0200

[diff] [blame]

299

while pos > 0 and rawtext[pos - 1] in "rRbBuU":

Serhiy Storchaka

8c126d7

2013-01-01 22:25:59 +0200

[diff] [blame]

300

pos -= 1

301

last_identifier_pos = pos

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

break

else:

# We've found an operator or something.

306

break

307

308

return rawtext[last_identifier_pos:self.indexinrawtext]

Terry Jan Reedy