Blame - Lib/idlelib/pyparse.py - platform/external/python/cpython3

2018-02-21 22:48:36 -0500

[diff] [blame]

1

"""Define partial Python code Parser used by editor and hyperparser.

2

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

3

Instances of ParseMap are used with str.translate.

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

4

5

The following bound search and match functions are defined:

6

_synchre - start of popular statement;

7

_junkre - whitespace or comment line;

8

_match_stringre: string, possibly without closer;

9

_itemre - line that may have bracket structure start;

10

_closere - line that must be followed by dedent.

11

_chew_ordinaryre - non-special characters.

12

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

import re

import sys

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

16

# Reason last statement is continued (or C_NONE if it's not).

Kurt B. Kaiser

2005-11-15 07:20:06 +0000

[diff] [blame]

17

(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,

18

C_STRING_NEXT_LINES, C_BRACKET) = range(5)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

19

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

20

# Find what looks like the start of a popular statement.

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

21

22

_synchre = re.compile(r"""

23

^

24

[ \t]*

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

25

(?: while

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

| else

| def

| return

| assert

| break

| class

| continue

| elif

| try

| except

| raise

| import

Kurt B. Kaiser

752e4d5

2001-07-14 04:59:24 +0000

[diff] [blame]

38

| yield

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

39

)

40

\b

41

""", re.VERBOSE | re.MULTILINE).search

42

43

# Match blank line or non-indenting comment line.

44

45

_junkre = re.compile(r"""

[ \t]*

(?: \# \S .* )?

\n

""", re.VERBOSE).match

50

51

# Match any flavor of string; the terminating quote is optional

52

# so that we're robust in the face of incomplete program text.

53

54

_match_stringre = re.compile(r"""

\""" [^"\\]* (?:

(?: \\. | "(?!"") )

[^"\\]*

)*

(?: \""" )?

| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?

| ''' [^'\\]* (?:

(?: \\. | '(?!'') )

[^'\\]*

)*

(?: ''' )?

| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?

70

""", re.VERBOSE | re.DOTALL).match

71

72

# Match a line that starts with something interesting;

73

# used to find the first item of a bracket structure.

74

75

_itemre = re.compile(r"""

76

[ \t]*

77

[^\s#\\] # if we match, m.end()-1 is the interesting char

78

""", re.VERBOSE).match

79

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

80

# Match start of statements that should be followed by a dedent.

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

81

82

_closere = re.compile(r"""

\s*

(?: return

| break

| continue

| raise

| pass

)

\b

""", re.VERBOSE).match

92

93

# Chew up non-special chars as quickly as possible. If match is

94

# successful, m.end() less 1 is the index of the last boring char

95

# matched. If match is unsuccessful, the string starts with an

96

# interesting char.

97

98

_chew_ordinaryre = re.compile(r"""

99

[^[\](){}#'"\\]+

100

""", re.VERBOSE).match

101

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

102

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

103

class ParseMap(dict):

104

r"""Dict subclass that maps anything not in dict to 'x'.

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

105

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

106

This is designed to be used with str.translate in study1.

107

Anything not specifically mapped otherwise becomes 'x'.

108

Example: replace everything except whitespace with 'x'.

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

109

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

110

>>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')

111

>>> "a + b\tc\nd".translate(keepwhite)

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

112

'x x x\tx\nx'

113

"""

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

114

# Calling this triples access time; see bpo-32940

115

def __missing__(self, key):

116

return 120 # ord('x')

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

117

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

118

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

119

# Map all ascii to 120 to avoid __missing__ call, then replace some.

120

trans = ParseMap.fromkeys(range(128), 120)

121

trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';

122

trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.

123

trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.

Tal Einat

2014-07-16 16:33:36 +0300

[diff] [blame]

124

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

class Parser:

def __init__(self, indentwidth, tabwidth):

129

self.indentwidth = indentwidth

130

self.tabwidth = tabwidth

131

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

132

def set_code(self, s):

Walter Dörwald

5de48bd

2007-06-11 21:38:39 +0000

[diff] [blame]

133

assert len(s) == 0 or s[-1] == '\n'

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

134

self.code = s

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

135

self.study_level = 0

136

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

137

def find_good_parse_start(self, is_char_in_string=None,

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

138

_synchre=_synchre):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

139

"""

140

Return index of a good place to begin parsing, as close to the

141

end of the string as possible. This will be the start of some

142

popular stmt like "if" or "def". Return None if none found:

143

the caller should pass more prior context then, if possible, or

144

if not (the entire program text up until the point of interest

145

has already been tried) pass 0 to set_lo().

146

147

This will be reliable iff given a reliable is_char_in_string()

148

function, meaning that when it says "no", it's absolutely

149

guaranteed that the char is not in a string.

150

"""

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

151

code, pos = self.code, None

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

152

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

153

if not is_char_in_string:

154

# no clue -- make the caller pass everything

155

return None

156

157

# Peek back from the end for a good place to start,

158

# but don't try too often; pos will be left None, or

159

# bumped to a legitimate synch point.

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

160

limit = len(code)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

161

for tries in range(5):

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

162

i = code.rfind(":\n", 0, limit)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

163

if i < 0:

164

break

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

165

i = code.rfind('\n', 0, i) + 1 # start of colon line (-1+1=0)

166

m = _synchre(code, i, limit)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

167

if m and not is_char_in_string(m.start()):

pos = m.start()

break

limit = i

if pos is None:

# Nothing looks like a block-opener, or stuff does

173

# but is_char_in_string keeps returning true; most likely

174

# we're in or near a giant string, the colorizer hasn't

175

# caught up enough to be helpful, or there simply *aren't*

176

# any interesting stmts. In any of these cases we're

177

# going to have to parse the whole thing to be sure, so

178

# give it one last try from the start, but stop wasting

179

# time here regardless of the outcome.

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

180

m = _synchre(code)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

181

if m and not is_char_in_string(m.start()):

pos = m.start()

return pos

# Peeking back worked; look forward until _synchre no longer

186

# matches.

187

i = pos + 1

188

while 1:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

189

m = _synchre(code, i)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

190

if m:

191

s, i = m.span()

192

if not is_char_in_string(s):

pos = s

else:

break

return pos

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

198

def set_lo(self, lo):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

199

""" Throw away the start of the string.

200

201

Intended to be called with the result of find_good_parse_start().

202

"""

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

203

assert lo == 0 or self.code[lo-1] == '\n'

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

204

if lo > 0:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

205

self.code = self.code[lo:]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

206

Kurt B. Kaiser

254eb53

2002-09-17 03:55:13 +0000

[diff] [blame]

207

def _study1(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

208

"""Find the line numbers of non-continuation lines.

209

210

As quickly as humanly possible <wink>, find the line numbers (0-

211

based) of the non-continuation lines.

212

Creates self.{goodlines, continuation}.

213

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

214

if self.study_level >= 1:

return

self.study_level = 1

# Map all uninteresting characters to "x", all open brackets

219

# to "(", all close brackets to ")", then collapse runs of

220

# uninteresting characters. This can cut the number of chars

221

# by a factor of 10-40, and so greatly speed the following loop.

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

222

code = self.code

Cheryl Sabella

2018-02-28 17:23:58 -0500

[diff] [blame]

223

code = code.translate(trans)

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

224

code = code.replace('xxxxxxxx', 'x')

225

code = code.replace('xxxx', 'x')

226

code = code.replace('xx', 'x')

227

code = code.replace('xx', 'x')

228

code = code.replace('\nx', '\n')

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

229

# Replacing x\n with \n would be incorrect because

230

# x may be preceded by a backslash.

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

231

232

# March over the squashed version of the program, accumulating

233

# the line numbers of non-continued stmts, and determining

234

# whether & why the last stmt is a continuation.

235

continuation = C_NONE

236

level = lno = 0 # level is nesting level; lno is line number

237

self.goodlines = goodlines = [0]

238

push_good = goodlines.append

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

239

i, n = 0, len(code)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

240

while i < n:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

241

ch = code[i]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

242

i = i+1

243

244

# cases are checked in decreasing order of frequency

if ch == 'x':

continue

if ch == '\n':

lno = lno + 1

if level == 0:

push_good(lno)

# else we're in an unclosed bracket structure

continue

if ch == '(':

level = level + 1

continue

if ch == ')':

if level:

level = level - 1

# else the program is invalid, but we can't complain

263

continue

264

265

if ch == '"' or ch == "'":

266

# consume the string

267

quote = ch

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

268

if code[i-1:i+2] == quote * 3:

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

269

quote = quote * 3

Kurt B. Kaiser

2005-11-15 07:20:06 +0000

[diff] [blame]

270

firstlno = lno

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

271

w = len(quote) - 1

272

i = i+w

273

while i < n:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

274

ch = code[i]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

i = i+1

if ch == 'x':

continue

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

280

if code[i-1:i+w] == quote:

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

i = i+w

break

if ch == '\n':

lno = lno + 1

if w == 0:

# unterminated single-quoted string

if level == 0:

push_good(lno)

break

continue

if ch == '\\':

assert i < n

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

295

if code[i] == '\n':

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

lno = lno + 1

i = i+1

continue

# else comment char or paren inside string

301

302

else:

303

# didn't break out of the loop, so we're still

304

# inside a string

Kurt B. Kaiser

2005-11-15 07:20:06 +0000

[diff] [blame]

305

if (lno - 1) == firstlno:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

306

# before the previous \n in code, we were in the first

Kurt B. Kaiser

2005-11-15 07:20:06 +0000

[diff] [blame]

307

# line of the string

308

continuation = C_STRING_FIRST_LINE

309

else:

310

continuation = C_STRING_NEXT_LINES

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

311

continue # with outer loop

312

313

if ch == '#':

314

# consume the comment

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

315

i = code.find('\n', i)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

assert i >= 0

continue

assert ch == '\\'

assert i < n

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

321

if code[i] == '\n':

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

322

lno = lno + 1

323

if i+1 == n:

324

continuation = C_BACKSLASH

325

i = i+1

326

327

# The last stmt may be continued for all 3 reasons.

328

# String continuation takes precedence over bracket

329

# continuation, which beats backslash continuation.

Kurt B. Kaiser

2005-11-15 07:20:06 +0000

[diff] [blame]

330

if (continuation != C_STRING_FIRST_LINE

331

and continuation != C_STRING_NEXT_LINES and level > 0):

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

332

continuation = C_BRACKET

333

self.continuation = continuation

334

335

# Push the final line number as a sentinel value, regardless of

336

# whether it's continued.

337

assert (continuation == C_NONE) == (goodlines[-1] == lno)

338

if goodlines[-1] != lno:

339

push_good(lno)

340

341

def get_continuation_type(self):

342

self._study1()

343

return self.continuation

344

Kurt B. Kaiser

254eb53

2002-09-17 03:55:13 +0000

[diff] [blame]

345

def _study2(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

346

"""

347

study1 was sufficient to determine the continuation status,

348

but doing more requires looking at every character. study2

349

does this for the last interesting statement in the block.

350

Creates:

351

self.stmt_start, stmt_end

352

slice indices of last interesting stmt

353

self.stmt_bracketing

354

the bracketing structure of the last interesting stmt; for

355

example, for the statement "say(boo) or die",

356

stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),

357

(4, 0)). Strings and comments are treated as brackets, for

358

the matter.

359

self.lastch

360

last interesting character before optional trailing comment

361

self.lastopenbracketpos

362

if continuation is C_BRACKET, index of last open bracket

363

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

364

if self.study_level >= 2:

return

self._study1()

self.study_level = 2

# Set p and q to slice indices of last interesting stmt.

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

370

code, goodlines = self.code, self.goodlines

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

371

i = len(goodlines) - 1 # Index of newest line.

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

372

p = len(code) # End of goodlines[i]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

373

while i:

374

assert p

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

375

# Make p be the index of the stmt at line number goodlines[i].

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

376

# Move p back to the stmt at line number goodlines[i-1].

377

q = p

378

for nothing in range(goodlines[i-1], goodlines[i]):

379

# tricky: sets p to 0 if no preceding newline

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

380

p = code.rfind('\n', 0, p-1) + 1

381

# The stmt code[p:q] isn't a continuation, but may be blank

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

382

# or a non-indenting comment line.

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

383

if _junkre(code, p):

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

i = i-1

else:

break

if i == 0:

# nothing but junk!

assert p == 0

q = p

self.stmt_start, self.stmt_end = p, q

392

393

# Analyze this stmt, to find the last open bracket (if any)

394

# and last interesting character (if any).

395

lastch = ""

396

stack = [] # stack of open bracket indices

397

push_stack = stack.append

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

398

bracketing = [(p, 0)]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

399

while p < q:

400

# suck up all except ()[]{}'"#\\

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

401

m = _chew_ordinaryre(code, p, q)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

402

if m:

403

# we skipped at least one boring char

Kurt B. Kaiser

2001-07-13 20:33:46 +0000

[diff] [blame]

404

newp = m.end()

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

405

# back up over totally boring whitespace

Kurt B. Kaiser

2001-07-13 20:33:46 +0000

[diff] [blame]

406

i = newp - 1 # index of last boring char

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

407

while i >= p and code[i] in " \t\n":

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

408

i = i-1

Kurt B. Kaiser

2001-07-13 20:33:46 +0000

[diff] [blame]

409

if i >= p:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

410

lastch = code[i]

Kurt B. Kaiser

2001-07-13 20:33:46 +0000

[diff] [blame]

411

p = newp

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

if p >= q:

break

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

415

ch = code[p]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

416

417

if ch in "([{":

418

push_stack(p)

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

419

bracketing.append((p, len(stack)))

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

lastch = ch

p = p+1

continue

if ch in ")]}":

if stack:

del stack[-1]

lastch = ch

p = p+1

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

429

bracketing.append((p, len(stack)))

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

430

continue

431

432

if ch == '"' or ch == "'":

433

# consume string

434

# Note that study1 did this with a Python loop, but

435

# we use a regexp here; the reason is speed in both

436

# cases; the string may be huge, but study1 pre-squashed

437

# strings to a couple of characters per line. study1

438

# also needed to keep track of newlines, and we don't

439

# have to.

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

440

bracketing.append((p, len(stack)+1))

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

441

lastch = ch

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

442

p = _match_stringre(code, p, q).end()

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

443

bracketing.append((p, len(stack)))

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

continue

if ch == '#':

# consume comment and trailing newline

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

448

bracketing.append((p, len(stack)+1))

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

449

p = code.find('\n', p, q) + 1

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

450

assert p > 0

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

451

bracketing.append((p, len(stack)))

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

continue

assert ch == '\\'

p = p+1 # beyond backslash

456

assert p < q

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

457

if code[p] != '\n':

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

458

# the program is invalid, but can't complain

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

459

lastch = ch + code[p]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

460

p = p+1 # beyond escaped char

# end while p < q:

self.lastch = lastch

Terry Jan Reedy

451d1ed

2018-02-22 01:19:02 -0500

[diff] [blame]

465

self.lastopenbracketpos = stack[-1] if stack else None

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

466

self.stmt_bracketing = tuple(bracketing)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

467

Kurt B. Kaiser

254eb53

2002-09-17 03:55:13 +0000

[diff] [blame]

468

def compute_bracket_indent(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

469

"""Return number of spaces the next line should be indented.

470

471

Line continuation must be C_BRACKET.

472

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

473

self._study2()

474

assert self.continuation == C_BRACKET

475

j = self.lastopenbracketpos

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

476

code = self.code

477

n = len(code)

478

origi = i = code.rfind('\n', 0, j) + 1

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

479

j = j+1 # one beyond open bracket

480

# find first list item; set i to start of its line

481

while j < n:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

482

m = _itemre(code, j)

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

483

if m:

484

j = m.end() - 1 # index of first interesting char

extra = 0

break

else:

# this line is junk; advance to next line

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

489

i = j = code.find('\n', j) + 1

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

490

else:

491

# nothing interesting follows the bracket;

492

# reproduce the bracket line's indentation + a level

493

j = i = origi

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

494

while code[j] in " \t":

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

495

j = j+1

496

extra = self.indentwidth

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

497

return len(code[i:j].expandtabs(self.tabwidth)) + extra

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

498

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

499

def get_num_lines_in_stmt(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

500

"""Return number of physical lines in last stmt.

501

502

The statement doesn't have to be an interesting statement. This is

503

intended to be called when continuation is C_BACKSLASH.

504

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

505

self._study1()

506

goodlines = self.goodlines

507

return goodlines[-1] - goodlines[-2]

508

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

509

def compute_backslash_indent(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

510

"""Return number of spaces the next line should be indented.

511

512

Line continuation must be C_BACKSLASH. Also assume that the new

513

line is the first one following the initial line of the stmt.

514

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

515

self._study2()

516

assert self.continuation == C_BACKSLASH

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

517

code = self.code

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

518

i = self.stmt_start

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

519

while code[i] in " \t":

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

i = i+1

startpos = i

# See whether the initial line starts an assignment stmt; i.e.,

524

# look for an = operator

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

525

endpos = code.find('\n', startpos) + 1

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

526

found = level = 0

527

while i < endpos:

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

528

ch = code[i]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

if ch in "([{":

level = level + 1

i = i+1

elif ch in ")]}":

if level:

level = level - 1

i = i+1

elif ch == '"' or ch == "'":

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

537

i = _match_stringre(code, i, endpos).end()

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

538

elif ch == '#':

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

539

# This line is unreachable because the # makes a comment of

540

# everything after it.

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

541

break

542

elif level == 0 and ch == '=' and \

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

543

(i == 0 or code[i-1] not in "=<>!") and \

544

code[i+1] != '=':

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

found = 1

break

else:

i = i+1

if found:

# found a legit =, but it may be the last interesting

552

# thing on the line

553

i = i+1 # move beyond the =

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

554

found = re.match(r"\s*\\", code[i:endpos]) is None

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

555

556

if not found:

557

# oh well ... settle for moving beyond the first chunk

558

# of non-whitespace chars

559

i = startpos

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

560

while code[i] not in " \t\n":

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

561

i = i+1

562

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

563

return len(code[self.stmt_start:i].expandtabs(\

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

564

self.tabwidth)) + 1

565

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

566

def get_base_indent_string(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

567

"""Return the leading whitespace on the initial line of the last

568

interesting stmt.

569

"""

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

570

self._study2()

571

i, n = self.stmt_start, self.stmt_end

572

j = i

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

573

code = self.code

574

while j < n and code[j] in " \t":

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

575

j = j + 1

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

576

return code[i:j]

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

577

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

578

def is_block_opener(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

579

"Return True if the last interesting statemtent opens a block."

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

580

self._study2()

581

return self.lastch == ':'

582

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

583

def is_block_closer(self):

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

584

"Return True if the last interesting statement closes a block."

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

585

self._study2()

Cheryl Sabella

2018-02-23 21:35:27 -0500

[diff] [blame]

586

return _closere(self.code, self.stmt_start) is not None

David Scherer

2000-08-15 01:13:23 +0000

[diff] [blame]

587

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

588

def get_last_stmt_bracketing(self):

Terry Jan Reedy

451d1ed

2018-02-22 01:19:02 -0500

[diff] [blame]

589

"""Return bracketing structure of the last interesting statement.

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

590

Terry Jan Reedy

451d1ed

2018-02-22 01:19:02 -0500

[diff] [blame]

591

The returned tuple is in the format defined in _study2().

Cheryl Sabella

2018-02-21 22:48:36 -0500

[diff] [blame]

592

"""

Kurt B. Kaiser

2005-11-18 22:05:48 +0000

[diff] [blame]

593

self._study2()

594

return self.stmt_bracketing

Cheryl Sabella