blob: d34872b4396e1e2abdc399de1a9e91e76ceadd35 [file] [log] [blame]
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -05001"""Define partial Python code Parser used by editor and hyperparser.
2
Cheryl Sabellaf0daa882018-02-28 17:23:58 -05003Instances of ParseMap are used with str.translate.
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -05004
5The following bound search and match functions are defined:
6_synchre - start of popular statement;
7_junkre - whitespace or comment line;
8_match_stringre: string, possibly without closer;
9_itemre - line that may have bracket structure start;
10_closere - line that must be followed by dedent.
11_chew_ordinaryre - non-special characters.
12"""
David Scherer7aced172000-08-15 01:13:23 +000013import re
David Scherer7aced172000-08-15 01:13:23 +000014
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050015# Reason last statement is continued (or C_NONE if it's not).
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +000016(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
17 C_STRING_NEXT_LINES, C_BRACKET) = range(5)
David Scherer7aced172000-08-15 01:13:23 +000018
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050019# Find what looks like the start of a popular statement.
David Scherer7aced172000-08-15 01:13:23 +000020
21_synchre = re.compile(r"""
22 ^
23 [ \t]*
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000024 (?: while
David Scherer7aced172000-08-15 01:13:23 +000025 | else
26 | def
27 | return
28 | assert
29 | break
30 | class
31 | continue
32 | elif
33 | try
34 | except
35 | raise
36 | import
Kurt B. Kaiser752e4d52001-07-14 04:59:24 +000037 | yield
David Scherer7aced172000-08-15 01:13:23 +000038 )
39 \b
40""", re.VERBOSE | re.MULTILINE).search
41
42# Match blank line or non-indenting comment line.
43
44_junkre = re.compile(r"""
45 [ \t]*
46 (?: \# \S .* )?
47 \n
48""", re.VERBOSE).match
49
50# Match any flavor of string; the terminating quote is optional
51# so that we're robust in the face of incomplete program text.
52
53_match_stringre = re.compile(r"""
54 \""" [^"\\]* (?:
55 (?: \\. | "(?!"") )
56 [^"\\]*
57 )*
58 (?: \""" )?
59
60| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
61
62| ''' [^'\\]* (?:
63 (?: \\. | '(?!'') )
64 [^'\\]*
65 )*
66 (?: ''' )?
67
68| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
69""", re.VERBOSE | re.DOTALL).match
70
71# Match a line that starts with something interesting;
72# used to find the first item of a bracket structure.
73
74_itemre = re.compile(r"""
75 [ \t]*
76 [^\s#\\] # if we match, m.end()-1 is the interesting char
77""", re.VERBOSE).match
78
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050079# Match start of statements that should be followed by a dedent.
David Scherer7aced172000-08-15 01:13:23 +000080
81_closere = re.compile(r"""
82 \s*
83 (?: return
84 | break
85 | continue
86 | raise
87 | pass
88 )
89 \b
90""", re.VERBOSE).match
91
92# Chew up non-special chars as quickly as possible. If match is
93# successful, m.end() less 1 is the index of the last boring char
94# matched. If match is unsuccessful, the string starts with an
95# interesting char.
96
97_chew_ordinaryre = re.compile(r"""
98 [^[\](){}#'"\\]+
99""", re.VERBOSE).match
100
David Scherer7aced172000-08-15 01:13:23 +0000101
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500102class ParseMap(dict):
103 r"""Dict subclass that maps anything not in dict to 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300104
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500105 This is designed to be used with str.translate in study1.
106 Anything not specifically mapped otherwise becomes 'x'.
107 Example: replace everything except whitespace with 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300108
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500109 >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
110 >>> "a + b\tc\nd".translate(keepwhite)
Tal Einat9b7f9e62014-07-16 16:33:36 +0300111 'x x x\tx\nx'
112 """
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500113 # Calling this triples access time; see bpo-32940
114 def __missing__(self, key):
115 return 120 # ord('x')
Tal Einat9b7f9e62014-07-16 16:33:36 +0300116
Tal Einat9b7f9e62014-07-16 16:33:36 +0300117
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500118# Map all ascii to 120 to avoid __missing__ call, then replace some.
119trans = ParseMap.fromkeys(range(128), 120)
120trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';
121trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.
122trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300123
David Scherer7aced172000-08-15 01:13:23 +0000124
125class Parser:
126
127 def __init__(self, indentwidth, tabwidth):
128 self.indentwidth = indentwidth
129 self.tabwidth = tabwidth
130
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500131 def set_code(self, s):
Walter Dörwald5de48bd2007-06-11 21:38:39 +0000132 assert len(s) == 0 or s[-1] == '\n'
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500133 self.code = s
David Scherer7aced172000-08-15 01:13:23 +0000134 self.study_level = 0
135
Terry Jan Reedyf9e07e12020-01-22 23:55:07 -0500136 def find_good_parse_start(self, is_char_in_string):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500137 """
138 Return index of a good place to begin parsing, as close to the
139 end of the string as possible. This will be the start of some
140 popular stmt like "if" or "def". Return None if none found:
141 the caller should pass more prior context then, if possible, or
142 if not (the entire program text up until the point of interest
143 has already been tried) pass 0 to set_lo().
144
145 This will be reliable iff given a reliable is_char_in_string()
146 function, meaning that when it says "no", it's absolutely
147 guaranteed that the char is not in a string.
148 """
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500149 code, pos = self.code, None
David Scherer7aced172000-08-15 01:13:23 +0000150
David Scherer7aced172000-08-15 01:13:23 +0000151 # Peek back from the end for a good place to start,
152 # but don't try too often; pos will be left None, or
153 # bumped to a legitimate synch point.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500154 limit = len(code)
David Scherer7aced172000-08-15 01:13:23 +0000155 for tries in range(5):
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500156 i = code.rfind(":\n", 0, limit)
David Scherer7aced172000-08-15 01:13:23 +0000157 if i < 0:
158 break
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500159 i = code.rfind('\n', 0, i) + 1 # start of colon line (-1+1=0)
160 m = _synchre(code, i, limit)
David Scherer7aced172000-08-15 01:13:23 +0000161 if m and not is_char_in_string(m.start()):
162 pos = m.start()
163 break
164 limit = i
165 if pos is None:
166 # Nothing looks like a block-opener, or stuff does
167 # but is_char_in_string keeps returning true; most likely
168 # we're in or near a giant string, the colorizer hasn't
169 # caught up enough to be helpful, or there simply *aren't*
170 # any interesting stmts. In any of these cases we're
171 # going to have to parse the whole thing to be sure, so
172 # give it one last try from the start, but stop wasting
173 # time here regardless of the outcome.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500174 m = _synchre(code)
David Scherer7aced172000-08-15 01:13:23 +0000175 if m and not is_char_in_string(m.start()):
176 pos = m.start()
177 return pos
178
179 # Peeking back worked; look forward until _synchre no longer
180 # matches.
181 i = pos + 1
182 while 1:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500183 m = _synchre(code, i)
David Scherer7aced172000-08-15 01:13:23 +0000184 if m:
185 s, i = m.span()
186 if not is_char_in_string(s):
187 pos = s
188 else:
189 break
190 return pos
191
David Scherer7aced172000-08-15 01:13:23 +0000192 def set_lo(self, lo):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500193 """ Throw away the start of the string.
194
195 Intended to be called with the result of find_good_parse_start().
196 """
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500197 assert lo == 0 or self.code[lo-1] == '\n'
David Scherer7aced172000-08-15 01:13:23 +0000198 if lo > 0:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500199 self.code = self.code[lo:]
David Scherer7aced172000-08-15 01:13:23 +0000200
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000201 def _study1(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500202 """Find the line numbers of non-continuation lines.
203
204 As quickly as humanly possible <wink>, find the line numbers (0-
205 based) of the non-continuation lines.
206 Creates self.{goodlines, continuation}.
207 """
David Scherer7aced172000-08-15 01:13:23 +0000208 if self.study_level >= 1:
209 return
210 self.study_level = 1
211
212 # Map all uninteresting characters to "x", all open brackets
213 # to "(", all close brackets to ")", then collapse runs of
214 # uninteresting characters. This can cut the number of chars
215 # by a factor of 10-40, and so greatly speed the following loop.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500216 code = self.code
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500217 code = code.translate(trans)
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500218 code = code.replace('xxxxxxxx', 'x')
219 code = code.replace('xxxx', 'x')
220 code = code.replace('xx', 'x')
221 code = code.replace('xx', 'x')
222 code = code.replace('\nx', '\n')
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500223 # Replacing x\n with \n would be incorrect because
224 # x may be preceded by a backslash.
David Scherer7aced172000-08-15 01:13:23 +0000225
226 # March over the squashed version of the program, accumulating
227 # the line numbers of non-continued stmts, and determining
228 # whether & why the last stmt is a continuation.
229 continuation = C_NONE
230 level = lno = 0 # level is nesting level; lno is line number
231 self.goodlines = goodlines = [0]
232 push_good = goodlines.append
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500233 i, n = 0, len(code)
David Scherer7aced172000-08-15 01:13:23 +0000234 while i < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500235 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000236 i = i+1
237
238 # cases are checked in decreasing order of frequency
239 if ch == 'x':
240 continue
241
242 if ch == '\n':
243 lno = lno + 1
244 if level == 0:
245 push_good(lno)
246 # else we're in an unclosed bracket structure
247 continue
248
249 if ch == '(':
250 level = level + 1
251 continue
252
253 if ch == ')':
254 if level:
255 level = level - 1
256 # else the program is invalid, but we can't complain
257 continue
258
259 if ch == '"' or ch == "'":
260 # consume the string
261 quote = ch
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500262 if code[i-1:i+2] == quote * 3:
David Scherer7aced172000-08-15 01:13:23 +0000263 quote = quote * 3
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000264 firstlno = lno
David Scherer7aced172000-08-15 01:13:23 +0000265 w = len(quote) - 1
266 i = i+w
267 while i < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500268 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000269 i = i+1
270
271 if ch == 'x':
272 continue
273
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500274 if code[i-1:i+w] == quote:
David Scherer7aced172000-08-15 01:13:23 +0000275 i = i+w
276 break
277
278 if ch == '\n':
279 lno = lno + 1
280 if w == 0:
281 # unterminated single-quoted string
282 if level == 0:
283 push_good(lno)
284 break
285 continue
286
287 if ch == '\\':
288 assert i < n
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500289 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000290 lno = lno + 1
291 i = i+1
292 continue
293
294 # else comment char or paren inside string
295
296 else:
297 # didn't break out of the loop, so we're still
298 # inside a string
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000299 if (lno - 1) == firstlno:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500300 # before the previous \n in code, we were in the first
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000301 # line of the string
302 continuation = C_STRING_FIRST_LINE
303 else:
304 continuation = C_STRING_NEXT_LINES
David Scherer7aced172000-08-15 01:13:23 +0000305 continue # with outer loop
306
307 if ch == '#':
308 # consume the comment
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500309 i = code.find('\n', i)
David Scherer7aced172000-08-15 01:13:23 +0000310 assert i >= 0
311 continue
312
313 assert ch == '\\'
314 assert i < n
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500315 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000316 lno = lno + 1
317 if i+1 == n:
318 continuation = C_BACKSLASH
319 i = i+1
320
321 # The last stmt may be continued for all 3 reasons.
322 # String continuation takes precedence over bracket
323 # continuation, which beats backslash continuation.
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000324 if (continuation != C_STRING_FIRST_LINE
325 and continuation != C_STRING_NEXT_LINES and level > 0):
David Scherer7aced172000-08-15 01:13:23 +0000326 continuation = C_BRACKET
327 self.continuation = continuation
328
329 # Push the final line number as a sentinel value, regardless of
330 # whether it's continued.
331 assert (continuation == C_NONE) == (goodlines[-1] == lno)
332 if goodlines[-1] != lno:
333 push_good(lno)
334
335 def get_continuation_type(self):
336 self._study1()
337 return self.continuation
338
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000339 def _study2(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500340 """
341 study1 was sufficient to determine the continuation status,
342 but doing more requires looking at every character. study2
343 does this for the last interesting statement in the block.
344 Creates:
345 self.stmt_start, stmt_end
346 slice indices of last interesting stmt
347 self.stmt_bracketing
348 the bracketing structure of the last interesting stmt; for
349 example, for the statement "say(boo) or die",
350 stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),
351 (4, 0)). Strings and comments are treated as brackets, for
352 the matter.
353 self.lastch
354 last interesting character before optional trailing comment
355 self.lastopenbracketpos
356 if continuation is C_BRACKET, index of last open bracket
357 """
David Scherer7aced172000-08-15 01:13:23 +0000358 if self.study_level >= 2:
359 return
360 self._study1()
361 self.study_level = 2
362
363 # Set p and q to slice indices of last interesting stmt.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500364 code, goodlines = self.code, self.goodlines
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500365 i = len(goodlines) - 1 # Index of newest line.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500366 p = len(code) # End of goodlines[i]
David Scherer7aced172000-08-15 01:13:23 +0000367 while i:
368 assert p
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500369 # Make p be the index of the stmt at line number goodlines[i].
David Scherer7aced172000-08-15 01:13:23 +0000370 # Move p back to the stmt at line number goodlines[i-1].
371 q = p
372 for nothing in range(goodlines[i-1], goodlines[i]):
373 # tricky: sets p to 0 if no preceding newline
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500374 p = code.rfind('\n', 0, p-1) + 1
375 # The stmt code[p:q] isn't a continuation, but may be blank
David Scherer7aced172000-08-15 01:13:23 +0000376 # or a non-indenting comment line.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500377 if _junkre(code, p):
David Scherer7aced172000-08-15 01:13:23 +0000378 i = i-1
379 else:
380 break
381 if i == 0:
382 # nothing but junk!
383 assert p == 0
384 q = p
385 self.stmt_start, self.stmt_end = p, q
386
387 # Analyze this stmt, to find the last open bracket (if any)
388 # and last interesting character (if any).
389 lastch = ""
390 stack = [] # stack of open bracket indices
391 push_stack = stack.append
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000392 bracketing = [(p, 0)]
David Scherer7aced172000-08-15 01:13:23 +0000393 while p < q:
394 # suck up all except ()[]{}'"#\\
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500395 m = _chew_ordinaryre(code, p, q)
David Scherer7aced172000-08-15 01:13:23 +0000396 if m:
397 # we skipped at least one boring char
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000398 newp = m.end()
David Scherer7aced172000-08-15 01:13:23 +0000399 # back up over totally boring whitespace
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000400 i = newp - 1 # index of last boring char
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500401 while i >= p and code[i] in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000402 i = i-1
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000403 if i >= p:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500404 lastch = code[i]
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000405 p = newp
David Scherer7aced172000-08-15 01:13:23 +0000406 if p >= q:
407 break
408
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500409 ch = code[p]
David Scherer7aced172000-08-15 01:13:23 +0000410
411 if ch in "([{":
412 push_stack(p)
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000413 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000414 lastch = ch
415 p = p+1
416 continue
417
418 if ch in ")]}":
419 if stack:
420 del stack[-1]
421 lastch = ch
422 p = p+1
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000423 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000424 continue
425
426 if ch == '"' or ch == "'":
427 # consume string
428 # Note that study1 did this with a Python loop, but
429 # we use a regexp here; the reason is speed in both
430 # cases; the string may be huge, but study1 pre-squashed
431 # strings to a couple of characters per line. study1
432 # also needed to keep track of newlines, and we don't
433 # have to.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000434 bracketing.append((p, len(stack)+1))
David Scherer7aced172000-08-15 01:13:23 +0000435 lastch = ch
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500436 p = _match_stringre(code, p, q).end()
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000437 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000438 continue
439
440 if ch == '#':
441 # consume comment and trailing newline
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000442 bracketing.append((p, len(stack)+1))
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500443 p = code.find('\n', p, q) + 1
David Scherer7aced172000-08-15 01:13:23 +0000444 assert p > 0
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000445 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000446 continue
447
448 assert ch == '\\'
449 p = p+1 # beyond backslash
450 assert p < q
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500451 if code[p] != '\n':
David Scherer7aced172000-08-15 01:13:23 +0000452 # the program is invalid, but can't complain
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500453 lastch = ch + code[p]
David Scherer7aced172000-08-15 01:13:23 +0000454 p = p+1 # beyond escaped char
455
456 # end while p < q:
457
458 self.lastch = lastch
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500459 self.lastopenbracketpos = stack[-1] if stack else None
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000460 self.stmt_bracketing = tuple(bracketing)
David Scherer7aced172000-08-15 01:13:23 +0000461
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000462 def compute_bracket_indent(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500463 """Return number of spaces the next line should be indented.
464
465 Line continuation must be C_BRACKET.
466 """
David Scherer7aced172000-08-15 01:13:23 +0000467 self._study2()
468 assert self.continuation == C_BRACKET
469 j = self.lastopenbracketpos
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500470 code = self.code
471 n = len(code)
472 origi = i = code.rfind('\n', 0, j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000473 j = j+1 # one beyond open bracket
474 # find first list item; set i to start of its line
475 while j < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500476 m = _itemre(code, j)
David Scherer7aced172000-08-15 01:13:23 +0000477 if m:
478 j = m.end() - 1 # index of first interesting char
479 extra = 0
480 break
481 else:
482 # this line is junk; advance to next line
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500483 i = j = code.find('\n', j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000484 else:
485 # nothing interesting follows the bracket;
486 # reproduce the bracket line's indentation + a level
487 j = i = origi
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500488 while code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000489 j = j+1
490 extra = self.indentwidth
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500491 return len(code[i:j].expandtabs(self.tabwidth)) + extra
David Scherer7aced172000-08-15 01:13:23 +0000492
David Scherer7aced172000-08-15 01:13:23 +0000493 def get_num_lines_in_stmt(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500494 """Return number of physical lines in last stmt.
495
496 The statement doesn't have to be an interesting statement. This is
497 intended to be called when continuation is C_BACKSLASH.
498 """
David Scherer7aced172000-08-15 01:13:23 +0000499 self._study1()
500 goodlines = self.goodlines
501 return goodlines[-1] - goodlines[-2]
502
David Scherer7aced172000-08-15 01:13:23 +0000503 def compute_backslash_indent(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500504 """Return number of spaces the next line should be indented.
505
506 Line continuation must be C_BACKSLASH. Also assume that the new
507 line is the first one following the initial line of the stmt.
508 """
David Scherer7aced172000-08-15 01:13:23 +0000509 self._study2()
510 assert self.continuation == C_BACKSLASH
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500511 code = self.code
David Scherer7aced172000-08-15 01:13:23 +0000512 i = self.stmt_start
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500513 while code[i] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000514 i = i+1
515 startpos = i
516
517 # See whether the initial line starts an assignment stmt; i.e.,
518 # look for an = operator
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500519 endpos = code.find('\n', startpos) + 1
David Scherer7aced172000-08-15 01:13:23 +0000520 found = level = 0
521 while i < endpos:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500522 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000523 if ch in "([{":
524 level = level + 1
525 i = i+1
526 elif ch in ")]}":
527 if level:
528 level = level - 1
529 i = i+1
530 elif ch == '"' or ch == "'":
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500531 i = _match_stringre(code, i, endpos).end()
David Scherer7aced172000-08-15 01:13:23 +0000532 elif ch == '#':
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500533 # This line is unreachable because the # makes a comment of
534 # everything after it.
David Scherer7aced172000-08-15 01:13:23 +0000535 break
536 elif level == 0 and ch == '=' and \
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500537 (i == 0 or code[i-1] not in "=<>!") and \
538 code[i+1] != '=':
David Scherer7aced172000-08-15 01:13:23 +0000539 found = 1
540 break
541 else:
542 i = i+1
543
544 if found:
545 # found a legit =, but it may be the last interesting
546 # thing on the line
547 i = i+1 # move beyond the =
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500548 found = re.match(r"\s*\\", code[i:endpos]) is None
David Scherer7aced172000-08-15 01:13:23 +0000549
550 if not found:
551 # oh well ... settle for moving beyond the first chunk
552 # of non-whitespace chars
553 i = startpos
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500554 while code[i] not in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000555 i = i+1
556
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500557 return len(code[self.stmt_start:i].expandtabs(\
David Scherer7aced172000-08-15 01:13:23 +0000558 self.tabwidth)) + 1
559
David Scherer7aced172000-08-15 01:13:23 +0000560 def get_base_indent_string(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500561 """Return the leading whitespace on the initial line of the last
562 interesting stmt.
563 """
David Scherer7aced172000-08-15 01:13:23 +0000564 self._study2()
565 i, n = self.stmt_start, self.stmt_end
566 j = i
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500567 code = self.code
568 while j < n and code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000569 j = j + 1
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500570 return code[i:j]
David Scherer7aced172000-08-15 01:13:23 +0000571
David Scherer7aced172000-08-15 01:13:23 +0000572 def is_block_opener(self):
Terry Jan Reedy0acb6462019-07-30 18:14:58 -0400573 "Return True if the last interesting statement opens a block."
David Scherer7aced172000-08-15 01:13:23 +0000574 self._study2()
575 return self.lastch == ':'
576
David Scherer7aced172000-08-15 01:13:23 +0000577 def is_block_closer(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500578 "Return True if the last interesting statement closes a block."
David Scherer7aced172000-08-15 01:13:23 +0000579 self._study2()
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500580 return _closere(self.code, self.stmt_start) is not None
David Scherer7aced172000-08-15 01:13:23 +0000581
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000582 def get_last_stmt_bracketing(self):
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500583 """Return bracketing structure of the last interesting statement.
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500584
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500585 The returned tuple is in the format defined in _study2().
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500586 """
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000587 self._study2()
588 return self.stmt_bracketing
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500589
590
Terry Jan Reedyea3dc802018-06-18 04:47:59 -0400591if __name__ == '__main__':
592 from unittest import main
593 main('idlelib.idle_test.test_pyparse', verbosity=2)