blob: 81e7f539803c08e3737056737d6d9ffbd4615542 [file] [log] [blame]
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -05001"""Define partial Python code Parser used by editor and hyperparser.
2
Cheryl Sabellaf0daa882018-02-28 17:23:58 -05003Instances of ParseMap are used with str.translate.
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -05004
5The following bound search and match functions are defined:
6_synchre - start of popular statement;
7_junkre - whitespace or comment line;
8_match_stringre: string, possibly without closer;
9_itemre - line that may have bracket structure start;
10_closere - line that must be followed by dedent.
11_chew_ordinaryre - non-special characters.
12"""
David Scherer7aced172000-08-15 01:13:23 +000013import re
David Scherer7aced172000-08-15 01:13:23 +000014
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050015# Reason last statement is continued (or C_NONE if it's not).
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +000016(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
17 C_STRING_NEXT_LINES, C_BRACKET) = range(5)
David Scherer7aced172000-08-15 01:13:23 +000018
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050019# Find what looks like the start of a popular statement.
David Scherer7aced172000-08-15 01:13:23 +000020
21_synchre = re.compile(r"""
22 ^
23 [ \t]*
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000024 (?: while
David Scherer7aced172000-08-15 01:13:23 +000025 | else
26 | def
27 | return
28 | assert
29 | break
30 | class
31 | continue
32 | elif
33 | try
34 | except
35 | raise
36 | import
Kurt B. Kaiser752e4d52001-07-14 04:59:24 +000037 | yield
David Scherer7aced172000-08-15 01:13:23 +000038 )
39 \b
40""", re.VERBOSE | re.MULTILINE).search
41
42# Match blank line or non-indenting comment line.
43
44_junkre = re.compile(r"""
45 [ \t]*
46 (?: \# \S .* )?
47 \n
48""", re.VERBOSE).match
49
50# Match any flavor of string; the terminating quote is optional
51# so that we're robust in the face of incomplete program text.
52
53_match_stringre = re.compile(r"""
54 \""" [^"\\]* (?:
55 (?: \\. | "(?!"") )
56 [^"\\]*
57 )*
58 (?: \""" )?
59
60| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
61
62| ''' [^'\\]* (?:
63 (?: \\. | '(?!'') )
64 [^'\\]*
65 )*
66 (?: ''' )?
67
68| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
69""", re.VERBOSE | re.DOTALL).match
70
71# Match a line that starts with something interesting;
72# used to find the first item of a bracket structure.
73
74_itemre = re.compile(r"""
75 [ \t]*
76 [^\s#\\] # if we match, m.end()-1 is the interesting char
77""", re.VERBOSE).match
78
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050079# Match start of statements that should be followed by a dedent.
David Scherer7aced172000-08-15 01:13:23 +000080
81_closere = re.compile(r"""
82 \s*
83 (?: return
84 | break
85 | continue
86 | raise
87 | pass
88 )
89 \b
90""", re.VERBOSE).match
91
92# Chew up non-special chars as quickly as possible. If match is
93# successful, m.end() less 1 is the index of the last boring char
94# matched. If match is unsuccessful, the string starts with an
95# interesting char.
96
97_chew_ordinaryre = re.compile(r"""
98 [^[\](){}#'"\\]+
99""", re.VERBOSE).match
100
David Scherer7aced172000-08-15 01:13:23 +0000101
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500102class ParseMap(dict):
103 r"""Dict subclass that maps anything not in dict to 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300104
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500105 This is designed to be used with str.translate in study1.
106 Anything not specifically mapped otherwise becomes 'x'.
107 Example: replace everything except whitespace with 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300108
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500109 >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
110 >>> "a + b\tc\nd".translate(keepwhite)
Tal Einat9b7f9e62014-07-16 16:33:36 +0300111 'x x x\tx\nx'
112 """
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500113 # Calling this triples access time; see bpo-32940
114 def __missing__(self, key):
115 return 120 # ord('x')
Tal Einat9b7f9e62014-07-16 16:33:36 +0300116
Tal Einat9b7f9e62014-07-16 16:33:36 +0300117
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500118# Map all ascii to 120 to avoid __missing__ call, then replace some.
119trans = ParseMap.fromkeys(range(128), 120)
120trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';
121trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.
122trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300123
David Scherer7aced172000-08-15 01:13:23 +0000124
125class Parser:
126
127 def __init__(self, indentwidth, tabwidth):
128 self.indentwidth = indentwidth
129 self.tabwidth = tabwidth
130
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500131 def set_code(self, s):
Walter Dörwald5de48bd2007-06-11 21:38:39 +0000132 assert len(s) == 0 or s[-1] == '\n'
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500133 self.code = s
David Scherer7aced172000-08-15 01:13:23 +0000134 self.study_level = 0
135
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000136 def find_good_parse_start(self, is_char_in_string=None,
David Scherer7aced172000-08-15 01:13:23 +0000137 _synchre=_synchre):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500138 """
139 Return index of a good place to begin parsing, as close to the
140 end of the string as possible. This will be the start of some
141 popular stmt like "if" or "def". Return None if none found:
142 the caller should pass more prior context then, if possible, or
143 if not (the entire program text up until the point of interest
144 has already been tried) pass 0 to set_lo().
145
146 This will be reliable iff given a reliable is_char_in_string()
147 function, meaning that when it says "no", it's absolutely
148 guaranteed that the char is not in a string.
149 """
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500150 code, pos = self.code, None
David Scherer7aced172000-08-15 01:13:23 +0000151
David Scherer7aced172000-08-15 01:13:23 +0000152 if not is_char_in_string:
153 # no clue -- make the caller pass everything
154 return None
155
156 # Peek back from the end for a good place to start,
157 # but don't try too often; pos will be left None, or
158 # bumped to a legitimate synch point.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500159 limit = len(code)
David Scherer7aced172000-08-15 01:13:23 +0000160 for tries in range(5):
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500161 i = code.rfind(":\n", 0, limit)
David Scherer7aced172000-08-15 01:13:23 +0000162 if i < 0:
163 break
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500164 i = code.rfind('\n', 0, i) + 1 # start of colon line (-1+1=0)
165 m = _synchre(code, i, limit)
David Scherer7aced172000-08-15 01:13:23 +0000166 if m and not is_char_in_string(m.start()):
167 pos = m.start()
168 break
169 limit = i
170 if pos is None:
171 # Nothing looks like a block-opener, or stuff does
172 # but is_char_in_string keeps returning true; most likely
173 # we're in or near a giant string, the colorizer hasn't
174 # caught up enough to be helpful, or there simply *aren't*
175 # any interesting stmts. In any of these cases we're
176 # going to have to parse the whole thing to be sure, so
177 # give it one last try from the start, but stop wasting
178 # time here regardless of the outcome.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500179 m = _synchre(code)
David Scherer7aced172000-08-15 01:13:23 +0000180 if m and not is_char_in_string(m.start()):
181 pos = m.start()
182 return pos
183
184 # Peeking back worked; look forward until _synchre no longer
185 # matches.
186 i = pos + 1
187 while 1:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500188 m = _synchre(code, i)
David Scherer7aced172000-08-15 01:13:23 +0000189 if m:
190 s, i = m.span()
191 if not is_char_in_string(s):
192 pos = s
193 else:
194 break
195 return pos
196
David Scherer7aced172000-08-15 01:13:23 +0000197 def set_lo(self, lo):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500198 """ Throw away the start of the string.
199
200 Intended to be called with the result of find_good_parse_start().
201 """
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500202 assert lo == 0 or self.code[lo-1] == '\n'
David Scherer7aced172000-08-15 01:13:23 +0000203 if lo > 0:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500204 self.code = self.code[lo:]
David Scherer7aced172000-08-15 01:13:23 +0000205
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000206 def _study1(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500207 """Find the line numbers of non-continuation lines.
208
209 As quickly as humanly possible <wink>, find the line numbers (0-
210 based) of the non-continuation lines.
211 Creates self.{goodlines, continuation}.
212 """
David Scherer7aced172000-08-15 01:13:23 +0000213 if self.study_level >= 1:
214 return
215 self.study_level = 1
216
217 # Map all uninteresting characters to "x", all open brackets
218 # to "(", all close brackets to ")", then collapse runs of
219 # uninteresting characters. This can cut the number of chars
220 # by a factor of 10-40, and so greatly speed the following loop.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500221 code = self.code
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500222 code = code.translate(trans)
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500223 code = code.replace('xxxxxxxx', 'x')
224 code = code.replace('xxxx', 'x')
225 code = code.replace('xx', 'x')
226 code = code.replace('xx', 'x')
227 code = code.replace('\nx', '\n')
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500228 # Replacing x\n with \n would be incorrect because
229 # x may be preceded by a backslash.
David Scherer7aced172000-08-15 01:13:23 +0000230
231 # March over the squashed version of the program, accumulating
232 # the line numbers of non-continued stmts, and determining
233 # whether & why the last stmt is a continuation.
234 continuation = C_NONE
235 level = lno = 0 # level is nesting level; lno is line number
236 self.goodlines = goodlines = [0]
237 push_good = goodlines.append
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500238 i, n = 0, len(code)
David Scherer7aced172000-08-15 01:13:23 +0000239 while i < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500240 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000241 i = i+1
242
243 # cases are checked in decreasing order of frequency
244 if ch == 'x':
245 continue
246
247 if ch == '\n':
248 lno = lno + 1
249 if level == 0:
250 push_good(lno)
251 # else we're in an unclosed bracket structure
252 continue
253
254 if ch == '(':
255 level = level + 1
256 continue
257
258 if ch == ')':
259 if level:
260 level = level - 1
261 # else the program is invalid, but we can't complain
262 continue
263
264 if ch == '"' or ch == "'":
265 # consume the string
266 quote = ch
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500267 if code[i-1:i+2] == quote * 3:
David Scherer7aced172000-08-15 01:13:23 +0000268 quote = quote * 3
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000269 firstlno = lno
David Scherer7aced172000-08-15 01:13:23 +0000270 w = len(quote) - 1
271 i = i+w
272 while i < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500273 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000274 i = i+1
275
276 if ch == 'x':
277 continue
278
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500279 if code[i-1:i+w] == quote:
David Scherer7aced172000-08-15 01:13:23 +0000280 i = i+w
281 break
282
283 if ch == '\n':
284 lno = lno + 1
285 if w == 0:
286 # unterminated single-quoted string
287 if level == 0:
288 push_good(lno)
289 break
290 continue
291
292 if ch == '\\':
293 assert i < n
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500294 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000295 lno = lno + 1
296 i = i+1
297 continue
298
299 # else comment char or paren inside string
300
301 else:
302 # didn't break out of the loop, so we're still
303 # inside a string
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000304 if (lno - 1) == firstlno:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500305 # before the previous \n in code, we were in the first
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000306 # line of the string
307 continuation = C_STRING_FIRST_LINE
308 else:
309 continuation = C_STRING_NEXT_LINES
David Scherer7aced172000-08-15 01:13:23 +0000310 continue # with outer loop
311
312 if ch == '#':
313 # consume the comment
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500314 i = code.find('\n', i)
David Scherer7aced172000-08-15 01:13:23 +0000315 assert i >= 0
316 continue
317
318 assert ch == '\\'
319 assert i < n
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500320 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000321 lno = lno + 1
322 if i+1 == n:
323 continuation = C_BACKSLASH
324 i = i+1
325
326 # The last stmt may be continued for all 3 reasons.
327 # String continuation takes precedence over bracket
328 # continuation, which beats backslash continuation.
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000329 if (continuation != C_STRING_FIRST_LINE
330 and continuation != C_STRING_NEXT_LINES and level > 0):
David Scherer7aced172000-08-15 01:13:23 +0000331 continuation = C_BRACKET
332 self.continuation = continuation
333
334 # Push the final line number as a sentinel value, regardless of
335 # whether it's continued.
336 assert (continuation == C_NONE) == (goodlines[-1] == lno)
337 if goodlines[-1] != lno:
338 push_good(lno)
339
340 def get_continuation_type(self):
341 self._study1()
342 return self.continuation
343
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000344 def _study2(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500345 """
346 study1 was sufficient to determine the continuation status,
347 but doing more requires looking at every character. study2
348 does this for the last interesting statement in the block.
349 Creates:
350 self.stmt_start, stmt_end
351 slice indices of last interesting stmt
352 self.stmt_bracketing
353 the bracketing structure of the last interesting stmt; for
354 example, for the statement "say(boo) or die",
355 stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),
356 (4, 0)). Strings and comments are treated as brackets, for
357 the matter.
358 self.lastch
359 last interesting character before optional trailing comment
360 self.lastopenbracketpos
361 if continuation is C_BRACKET, index of last open bracket
362 """
David Scherer7aced172000-08-15 01:13:23 +0000363 if self.study_level >= 2:
364 return
365 self._study1()
366 self.study_level = 2
367
368 # Set p and q to slice indices of last interesting stmt.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500369 code, goodlines = self.code, self.goodlines
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500370 i = len(goodlines) - 1 # Index of newest line.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500371 p = len(code) # End of goodlines[i]
David Scherer7aced172000-08-15 01:13:23 +0000372 while i:
373 assert p
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500374 # Make p be the index of the stmt at line number goodlines[i].
David Scherer7aced172000-08-15 01:13:23 +0000375 # Move p back to the stmt at line number goodlines[i-1].
376 q = p
377 for nothing in range(goodlines[i-1], goodlines[i]):
378 # tricky: sets p to 0 if no preceding newline
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500379 p = code.rfind('\n', 0, p-1) + 1
380 # The stmt code[p:q] isn't a continuation, but may be blank
David Scherer7aced172000-08-15 01:13:23 +0000381 # or a non-indenting comment line.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500382 if _junkre(code, p):
David Scherer7aced172000-08-15 01:13:23 +0000383 i = i-1
384 else:
385 break
386 if i == 0:
387 # nothing but junk!
388 assert p == 0
389 q = p
390 self.stmt_start, self.stmt_end = p, q
391
392 # Analyze this stmt, to find the last open bracket (if any)
393 # and last interesting character (if any).
394 lastch = ""
395 stack = [] # stack of open bracket indices
396 push_stack = stack.append
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000397 bracketing = [(p, 0)]
David Scherer7aced172000-08-15 01:13:23 +0000398 while p < q:
399 # suck up all except ()[]{}'"#\\
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500400 m = _chew_ordinaryre(code, p, q)
David Scherer7aced172000-08-15 01:13:23 +0000401 if m:
402 # we skipped at least one boring char
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000403 newp = m.end()
David Scherer7aced172000-08-15 01:13:23 +0000404 # back up over totally boring whitespace
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000405 i = newp - 1 # index of last boring char
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500406 while i >= p and code[i] in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000407 i = i-1
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000408 if i >= p:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500409 lastch = code[i]
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000410 p = newp
David Scherer7aced172000-08-15 01:13:23 +0000411 if p >= q:
412 break
413
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500414 ch = code[p]
David Scherer7aced172000-08-15 01:13:23 +0000415
416 if ch in "([{":
417 push_stack(p)
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000418 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000419 lastch = ch
420 p = p+1
421 continue
422
423 if ch in ")]}":
424 if stack:
425 del stack[-1]
426 lastch = ch
427 p = p+1
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000428 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000429 continue
430
431 if ch == '"' or ch == "'":
432 # consume string
433 # Note that study1 did this with a Python loop, but
434 # we use a regexp here; the reason is speed in both
435 # cases; the string may be huge, but study1 pre-squashed
436 # strings to a couple of characters per line. study1
437 # also needed to keep track of newlines, and we don't
438 # have to.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000439 bracketing.append((p, len(stack)+1))
David Scherer7aced172000-08-15 01:13:23 +0000440 lastch = ch
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500441 p = _match_stringre(code, p, q).end()
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000442 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000443 continue
444
445 if ch == '#':
446 # consume comment and trailing newline
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000447 bracketing.append((p, len(stack)+1))
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500448 p = code.find('\n', p, q) + 1
David Scherer7aced172000-08-15 01:13:23 +0000449 assert p > 0
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000450 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000451 continue
452
453 assert ch == '\\'
454 p = p+1 # beyond backslash
455 assert p < q
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500456 if code[p] != '\n':
David Scherer7aced172000-08-15 01:13:23 +0000457 # the program is invalid, but can't complain
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500458 lastch = ch + code[p]
David Scherer7aced172000-08-15 01:13:23 +0000459 p = p+1 # beyond escaped char
460
461 # end while p < q:
462
463 self.lastch = lastch
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500464 self.lastopenbracketpos = stack[-1] if stack else None
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000465 self.stmt_bracketing = tuple(bracketing)
David Scherer7aced172000-08-15 01:13:23 +0000466
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000467 def compute_bracket_indent(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500468 """Return number of spaces the next line should be indented.
469
470 Line continuation must be C_BRACKET.
471 """
David Scherer7aced172000-08-15 01:13:23 +0000472 self._study2()
473 assert self.continuation == C_BRACKET
474 j = self.lastopenbracketpos
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500475 code = self.code
476 n = len(code)
477 origi = i = code.rfind('\n', 0, j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000478 j = j+1 # one beyond open bracket
479 # find first list item; set i to start of its line
480 while j < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500481 m = _itemre(code, j)
David Scherer7aced172000-08-15 01:13:23 +0000482 if m:
483 j = m.end() - 1 # index of first interesting char
484 extra = 0
485 break
486 else:
487 # this line is junk; advance to next line
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500488 i = j = code.find('\n', j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000489 else:
490 # nothing interesting follows the bracket;
491 # reproduce the bracket line's indentation + a level
492 j = i = origi
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500493 while code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000494 j = j+1
495 extra = self.indentwidth
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500496 return len(code[i:j].expandtabs(self.tabwidth)) + extra
David Scherer7aced172000-08-15 01:13:23 +0000497
David Scherer7aced172000-08-15 01:13:23 +0000498 def get_num_lines_in_stmt(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500499 """Return number of physical lines in last stmt.
500
501 The statement doesn't have to be an interesting statement. This is
502 intended to be called when continuation is C_BACKSLASH.
503 """
David Scherer7aced172000-08-15 01:13:23 +0000504 self._study1()
505 goodlines = self.goodlines
506 return goodlines[-1] - goodlines[-2]
507
David Scherer7aced172000-08-15 01:13:23 +0000508 def compute_backslash_indent(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500509 """Return number of spaces the next line should be indented.
510
511 Line continuation must be C_BACKSLASH. Also assume that the new
512 line is the first one following the initial line of the stmt.
513 """
David Scherer7aced172000-08-15 01:13:23 +0000514 self._study2()
515 assert self.continuation == C_BACKSLASH
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500516 code = self.code
David Scherer7aced172000-08-15 01:13:23 +0000517 i = self.stmt_start
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500518 while code[i] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000519 i = i+1
520 startpos = i
521
522 # See whether the initial line starts an assignment stmt; i.e.,
523 # look for an = operator
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500524 endpos = code.find('\n', startpos) + 1
David Scherer7aced172000-08-15 01:13:23 +0000525 found = level = 0
526 while i < endpos:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500527 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000528 if ch in "([{":
529 level = level + 1
530 i = i+1
531 elif ch in ")]}":
532 if level:
533 level = level - 1
534 i = i+1
535 elif ch == '"' or ch == "'":
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500536 i = _match_stringre(code, i, endpos).end()
David Scherer7aced172000-08-15 01:13:23 +0000537 elif ch == '#':
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500538 # This line is unreachable because the # makes a comment of
539 # everything after it.
David Scherer7aced172000-08-15 01:13:23 +0000540 break
541 elif level == 0 and ch == '=' and \
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500542 (i == 0 or code[i-1] not in "=<>!") and \
543 code[i+1] != '=':
David Scherer7aced172000-08-15 01:13:23 +0000544 found = 1
545 break
546 else:
547 i = i+1
548
549 if found:
550 # found a legit =, but it may be the last interesting
551 # thing on the line
552 i = i+1 # move beyond the =
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500553 found = re.match(r"\s*\\", code[i:endpos]) is None
David Scherer7aced172000-08-15 01:13:23 +0000554
555 if not found:
556 # oh well ... settle for moving beyond the first chunk
557 # of non-whitespace chars
558 i = startpos
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500559 while code[i] not in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000560 i = i+1
561
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500562 return len(code[self.stmt_start:i].expandtabs(\
David Scherer7aced172000-08-15 01:13:23 +0000563 self.tabwidth)) + 1
564
David Scherer7aced172000-08-15 01:13:23 +0000565 def get_base_indent_string(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500566 """Return the leading whitespace on the initial line of the last
567 interesting stmt.
568 """
David Scherer7aced172000-08-15 01:13:23 +0000569 self._study2()
570 i, n = self.stmt_start, self.stmt_end
571 j = i
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500572 code = self.code
573 while j < n and code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000574 j = j + 1
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500575 return code[i:j]
David Scherer7aced172000-08-15 01:13:23 +0000576
David Scherer7aced172000-08-15 01:13:23 +0000577 def is_block_opener(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500578 "Return True if the last interesting statemtent opens a block."
David Scherer7aced172000-08-15 01:13:23 +0000579 self._study2()
580 return self.lastch == ':'
581
David Scherer7aced172000-08-15 01:13:23 +0000582 def is_block_closer(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500583 "Return True if the last interesting statement closes a block."
David Scherer7aced172000-08-15 01:13:23 +0000584 self._study2()
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500585 return _closere(self.code, self.stmt_start) is not None
David Scherer7aced172000-08-15 01:13:23 +0000586
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000587 def get_last_stmt_bracketing(self):
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500588 """Return bracketing structure of the last interesting statement.
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500589
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500590 The returned tuple is in the format defined in _study2().
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500591 """
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000592 self._study2()
593 return self.stmt_bracketing
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500594
595
Terry Jan Reedyea3dc802018-06-18 04:47:59 -0400596if __name__ == '__main__':
597 from unittest import main
598 main('idlelib.idle_test.test_pyparse', verbosity=2)