blob: 1eeb9154d9061049155b5879246608b2b54fd6e7 [file] [log] [blame]
Miss Islington (bot)c59bc982018-02-21 20:09:39 -08001"""Define partial Python code Parser used by editor and hyperparser.
2
Miss Islington (bot)7e576342018-02-28 15:08:21 -08003Instances of ParseMap are used with str.translate.
Miss Islington (bot)c59bc982018-02-21 20:09:39 -08004
5The following bound search and match functions are defined:
6_synchre - start of popular statement;
7_junkre - whitespace or comment line;
8_match_stringre: string, possibly without closer;
9_itemre - line that may have bracket structure start;
10_closere - line that must be followed by dedent.
11_chew_ordinaryre - non-special characters.
12"""
David Scherer7aced172000-08-15 01:13:23 +000013import re
14import sys
15
Miss Islington (bot)c59bc982018-02-21 20:09:39 -080016# Reason last statement is continued (or C_NONE if it's not).
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +000017(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
18 C_STRING_NEXT_LINES, C_BRACKET) = range(5)
David Scherer7aced172000-08-15 01:13:23 +000019
Miss Islington (bot)c59bc982018-02-21 20:09:39 -080020# Find what looks like the start of a popular statement.
David Scherer7aced172000-08-15 01:13:23 +000021
22_synchre = re.compile(r"""
23 ^
24 [ \t]*
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000025 (?: while
David Scherer7aced172000-08-15 01:13:23 +000026 | else
27 | def
28 | return
29 | assert
30 | break
31 | class
32 | continue
33 | elif
34 | try
35 | except
36 | raise
37 | import
Kurt B. Kaiser752e4d52001-07-14 04:59:24 +000038 | yield
David Scherer7aced172000-08-15 01:13:23 +000039 )
40 \b
41""", re.VERBOSE | re.MULTILINE).search
42
43# Match blank line or non-indenting comment line.
44
45_junkre = re.compile(r"""
46 [ \t]*
47 (?: \# \S .* )?
48 \n
49""", re.VERBOSE).match
50
51# Match any flavor of string; the terminating quote is optional
52# so that we're robust in the face of incomplete program text.
53
54_match_stringre = re.compile(r"""
55 \""" [^"\\]* (?:
56 (?: \\. | "(?!"") )
57 [^"\\]*
58 )*
59 (?: \""" )?
60
61| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
62
63| ''' [^'\\]* (?:
64 (?: \\. | '(?!'') )
65 [^'\\]*
66 )*
67 (?: ''' )?
68
69| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
70""", re.VERBOSE | re.DOTALL).match
71
72# Match a line that starts with something interesting;
73# used to find the first item of a bracket structure.
74
75_itemre = re.compile(r"""
76 [ \t]*
77 [^\s#\\] # if we match, m.end()-1 is the interesting char
78""", re.VERBOSE).match
79
Miss Islington (bot)c59bc982018-02-21 20:09:39 -080080# Match start of statements that should be followed by a dedent.
David Scherer7aced172000-08-15 01:13:23 +000081
82_closere = re.compile(r"""
83 \s*
84 (?: return
85 | break
86 | continue
87 | raise
88 | pass
89 )
90 \b
91""", re.VERBOSE).match
92
93# Chew up non-special chars as quickly as possible. If match is
94# successful, m.end() less 1 is the index of the last boring char
95# matched. If match is unsuccessful, the string starts with an
96# interesting char.
97
98_chew_ordinaryre = re.compile(r"""
99 [^[\](){}#'"\\]+
100""", re.VERBOSE).match
101
David Scherer7aced172000-08-15 01:13:23 +0000102
Miss Islington (bot)7e576342018-02-28 15:08:21 -0800103class ParseMap(dict):
104 r"""Dict subclass that maps anything not in dict to 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300105
Miss Islington (bot)7e576342018-02-28 15:08:21 -0800106 This is designed to be used with str.translate in study1.
107 Anything not specifically mapped otherwise becomes 'x'.
108 Example: replace everything except whitespace with 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300109
Miss Islington (bot)7e576342018-02-28 15:08:21 -0800110 >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
111 >>> "a + b\tc\nd".translate(keepwhite)
Tal Einat9b7f9e62014-07-16 16:33:36 +0300112 'x x x\tx\nx'
113 """
Miss Islington (bot)7e576342018-02-28 15:08:21 -0800114 # Calling this triples access time; see bpo-32940
115 def __missing__(self, key):
116 return 120 # ord('x')
Tal Einat9b7f9e62014-07-16 16:33:36 +0300117
Tal Einat9b7f9e62014-07-16 16:33:36 +0300118
Miss Islington (bot)7e576342018-02-28 15:08:21 -0800119# Map all ascii to 120 to avoid __missing__ call, then replace some.
120trans = ParseMap.fromkeys(range(128), 120)
121trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';
122trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.
123trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300124
David Scherer7aced172000-08-15 01:13:23 +0000125
126class Parser:
127
128 def __init__(self, indentwidth, tabwidth):
129 self.indentwidth = indentwidth
130 self.tabwidth = tabwidth
131
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800132 def set_code(self, s):
Walter Dörwald5de48bd2007-06-11 21:38:39 +0000133 assert len(s) == 0 or s[-1] == '\n'
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800134 self.code = s
David Scherer7aced172000-08-15 01:13:23 +0000135 self.study_level = 0
136
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000137 def find_good_parse_start(self, is_char_in_string=None,
David Scherer7aced172000-08-15 01:13:23 +0000138 _synchre=_synchre):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800139 """
140 Return index of a good place to begin parsing, as close to the
141 end of the string as possible. This will be the start of some
142 popular stmt like "if" or "def". Return None if none found:
143 the caller should pass more prior context then, if possible, or
144 if not (the entire program text up until the point of interest
145 has already been tried) pass 0 to set_lo().
146
147 This will be reliable iff given a reliable is_char_in_string()
148 function, meaning that when it says "no", it's absolutely
149 guaranteed that the char is not in a string.
150 """
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800151 code, pos = self.code, None
David Scherer7aced172000-08-15 01:13:23 +0000152
David Scherer7aced172000-08-15 01:13:23 +0000153 if not is_char_in_string:
154 # no clue -- make the caller pass everything
155 return None
156
157 # Peek back from the end for a good place to start,
158 # but don't try too often; pos will be left None, or
159 # bumped to a legitimate synch point.
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800160 limit = len(code)
David Scherer7aced172000-08-15 01:13:23 +0000161 for tries in range(5):
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800162 i = code.rfind(":\n", 0, limit)
David Scherer7aced172000-08-15 01:13:23 +0000163 if i < 0:
164 break
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800165 i = code.rfind('\n', 0, i) + 1 # start of colon line (-1+1=0)
166 m = _synchre(code, i, limit)
David Scherer7aced172000-08-15 01:13:23 +0000167 if m and not is_char_in_string(m.start()):
168 pos = m.start()
169 break
170 limit = i
171 if pos is None:
172 # Nothing looks like a block-opener, or stuff does
173 # but is_char_in_string keeps returning true; most likely
174 # we're in or near a giant string, the colorizer hasn't
175 # caught up enough to be helpful, or there simply *aren't*
176 # any interesting stmts. In any of these cases we're
177 # going to have to parse the whole thing to be sure, so
178 # give it one last try from the start, but stop wasting
179 # time here regardless of the outcome.
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800180 m = _synchre(code)
David Scherer7aced172000-08-15 01:13:23 +0000181 if m and not is_char_in_string(m.start()):
182 pos = m.start()
183 return pos
184
185 # Peeking back worked; look forward until _synchre no longer
186 # matches.
187 i = pos + 1
188 while 1:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800189 m = _synchre(code, i)
David Scherer7aced172000-08-15 01:13:23 +0000190 if m:
191 s, i = m.span()
192 if not is_char_in_string(s):
193 pos = s
194 else:
195 break
196 return pos
197
David Scherer7aced172000-08-15 01:13:23 +0000198 def set_lo(self, lo):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800199 """ Throw away the start of the string.
200
201 Intended to be called with the result of find_good_parse_start().
202 """
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800203 assert lo == 0 or self.code[lo-1] == '\n'
David Scherer7aced172000-08-15 01:13:23 +0000204 if lo > 0:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800205 self.code = self.code[lo:]
David Scherer7aced172000-08-15 01:13:23 +0000206
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000207 def _study1(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800208 """Find the line numbers of non-continuation lines.
209
210 As quickly as humanly possible <wink>, find the line numbers (0-
211 based) of the non-continuation lines.
212 Creates self.{goodlines, continuation}.
213 """
David Scherer7aced172000-08-15 01:13:23 +0000214 if self.study_level >= 1:
215 return
216 self.study_level = 1
217
218 # Map all uninteresting characters to "x", all open brackets
219 # to "(", all close brackets to ")", then collapse runs of
220 # uninteresting characters. This can cut the number of chars
221 # by a factor of 10-40, and so greatly speed the following loop.
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800222 code = self.code
Miss Islington (bot)7e576342018-02-28 15:08:21 -0800223 code = code.translate(trans)
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800224 code = code.replace('xxxxxxxx', 'x')
225 code = code.replace('xxxx', 'x')
226 code = code.replace('xx', 'x')
227 code = code.replace('xx', 'x')
228 code = code.replace('\nx', '\n')
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800229 # Replacing x\n with \n would be incorrect because
230 # x may be preceded by a backslash.
David Scherer7aced172000-08-15 01:13:23 +0000231
232 # March over the squashed version of the program, accumulating
233 # the line numbers of non-continued stmts, and determining
234 # whether & why the last stmt is a continuation.
235 continuation = C_NONE
236 level = lno = 0 # level is nesting level; lno is line number
237 self.goodlines = goodlines = [0]
238 push_good = goodlines.append
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800239 i, n = 0, len(code)
David Scherer7aced172000-08-15 01:13:23 +0000240 while i < n:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800241 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000242 i = i+1
243
244 # cases are checked in decreasing order of frequency
245 if ch == 'x':
246 continue
247
248 if ch == '\n':
249 lno = lno + 1
250 if level == 0:
251 push_good(lno)
252 # else we're in an unclosed bracket structure
253 continue
254
255 if ch == '(':
256 level = level + 1
257 continue
258
259 if ch == ')':
260 if level:
261 level = level - 1
262 # else the program is invalid, but we can't complain
263 continue
264
265 if ch == '"' or ch == "'":
266 # consume the string
267 quote = ch
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800268 if code[i-1:i+2] == quote * 3:
David Scherer7aced172000-08-15 01:13:23 +0000269 quote = quote * 3
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000270 firstlno = lno
David Scherer7aced172000-08-15 01:13:23 +0000271 w = len(quote) - 1
272 i = i+w
273 while i < n:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800274 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000275 i = i+1
276
277 if ch == 'x':
278 continue
279
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800280 if code[i-1:i+w] == quote:
David Scherer7aced172000-08-15 01:13:23 +0000281 i = i+w
282 break
283
284 if ch == '\n':
285 lno = lno + 1
286 if w == 0:
287 # unterminated single-quoted string
288 if level == 0:
289 push_good(lno)
290 break
291 continue
292
293 if ch == '\\':
294 assert i < n
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800295 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000296 lno = lno + 1
297 i = i+1
298 continue
299
300 # else comment char or paren inside string
301
302 else:
303 # didn't break out of the loop, so we're still
304 # inside a string
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000305 if (lno - 1) == firstlno:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800306 # before the previous \n in code, we were in the first
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000307 # line of the string
308 continuation = C_STRING_FIRST_LINE
309 else:
310 continuation = C_STRING_NEXT_LINES
David Scherer7aced172000-08-15 01:13:23 +0000311 continue # with outer loop
312
313 if ch == '#':
314 # consume the comment
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800315 i = code.find('\n', i)
David Scherer7aced172000-08-15 01:13:23 +0000316 assert i >= 0
317 continue
318
319 assert ch == '\\'
320 assert i < n
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800321 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000322 lno = lno + 1
323 if i+1 == n:
324 continuation = C_BACKSLASH
325 i = i+1
326
327 # The last stmt may be continued for all 3 reasons.
328 # String continuation takes precedence over bracket
329 # continuation, which beats backslash continuation.
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000330 if (continuation != C_STRING_FIRST_LINE
331 and continuation != C_STRING_NEXT_LINES and level > 0):
David Scherer7aced172000-08-15 01:13:23 +0000332 continuation = C_BRACKET
333 self.continuation = continuation
334
335 # Push the final line number as a sentinel value, regardless of
336 # whether it's continued.
337 assert (continuation == C_NONE) == (goodlines[-1] == lno)
338 if goodlines[-1] != lno:
339 push_good(lno)
340
341 def get_continuation_type(self):
342 self._study1()
343 return self.continuation
344
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000345 def _study2(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800346 """
347 study1 was sufficient to determine the continuation status,
348 but doing more requires looking at every character. study2
349 does this for the last interesting statement in the block.
350 Creates:
351 self.stmt_start, stmt_end
352 slice indices of last interesting stmt
353 self.stmt_bracketing
354 the bracketing structure of the last interesting stmt; for
355 example, for the statement "say(boo) or die",
356 stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),
357 (4, 0)). Strings and comments are treated as brackets, for
358 the matter.
359 self.lastch
360 last interesting character before optional trailing comment
361 self.lastopenbracketpos
362 if continuation is C_BRACKET, index of last open bracket
363 """
David Scherer7aced172000-08-15 01:13:23 +0000364 if self.study_level >= 2:
365 return
366 self._study1()
367 self.study_level = 2
368
369 # Set p and q to slice indices of last interesting stmt.
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800370 code, goodlines = self.code, self.goodlines
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800371 i = len(goodlines) - 1 # Index of newest line.
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800372 p = len(code) # End of goodlines[i]
David Scherer7aced172000-08-15 01:13:23 +0000373 while i:
374 assert p
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800375 # Make p be the index of the stmt at line number goodlines[i].
David Scherer7aced172000-08-15 01:13:23 +0000376 # Move p back to the stmt at line number goodlines[i-1].
377 q = p
378 for nothing in range(goodlines[i-1], goodlines[i]):
379 # tricky: sets p to 0 if no preceding newline
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800380 p = code.rfind('\n', 0, p-1) + 1
381 # The stmt code[p:q] isn't a continuation, but may be blank
David Scherer7aced172000-08-15 01:13:23 +0000382 # or a non-indenting comment line.
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800383 if _junkre(code, p):
David Scherer7aced172000-08-15 01:13:23 +0000384 i = i-1
385 else:
386 break
387 if i == 0:
388 # nothing but junk!
389 assert p == 0
390 q = p
391 self.stmt_start, self.stmt_end = p, q
392
393 # Analyze this stmt, to find the last open bracket (if any)
394 # and last interesting character (if any).
395 lastch = ""
396 stack = [] # stack of open bracket indices
397 push_stack = stack.append
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000398 bracketing = [(p, 0)]
David Scherer7aced172000-08-15 01:13:23 +0000399 while p < q:
400 # suck up all except ()[]{}'"#\\
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800401 m = _chew_ordinaryre(code, p, q)
David Scherer7aced172000-08-15 01:13:23 +0000402 if m:
403 # we skipped at least one boring char
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000404 newp = m.end()
David Scherer7aced172000-08-15 01:13:23 +0000405 # back up over totally boring whitespace
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000406 i = newp - 1 # index of last boring char
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800407 while i >= p and code[i] in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000408 i = i-1
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000409 if i >= p:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800410 lastch = code[i]
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000411 p = newp
David Scherer7aced172000-08-15 01:13:23 +0000412 if p >= q:
413 break
414
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800415 ch = code[p]
David Scherer7aced172000-08-15 01:13:23 +0000416
417 if ch in "([{":
418 push_stack(p)
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000419 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000420 lastch = ch
421 p = p+1
422 continue
423
424 if ch in ")]}":
425 if stack:
426 del stack[-1]
427 lastch = ch
428 p = p+1
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000429 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000430 continue
431
432 if ch == '"' or ch == "'":
433 # consume string
434 # Note that study1 did this with a Python loop, but
435 # we use a regexp here; the reason is speed in both
436 # cases; the string may be huge, but study1 pre-squashed
437 # strings to a couple of characters per line. study1
438 # also needed to keep track of newlines, and we don't
439 # have to.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000440 bracketing.append((p, len(stack)+1))
David Scherer7aced172000-08-15 01:13:23 +0000441 lastch = ch
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800442 p = _match_stringre(code, p, q).end()
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000443 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000444 continue
445
446 if ch == '#':
447 # consume comment and trailing newline
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000448 bracketing.append((p, len(stack)+1))
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800449 p = code.find('\n', p, q) + 1
David Scherer7aced172000-08-15 01:13:23 +0000450 assert p > 0
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000451 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000452 continue
453
454 assert ch == '\\'
455 p = p+1 # beyond backslash
456 assert p < q
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800457 if code[p] != '\n':
David Scherer7aced172000-08-15 01:13:23 +0000458 # the program is invalid, but can't complain
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800459 lastch = ch + code[p]
David Scherer7aced172000-08-15 01:13:23 +0000460 p = p+1 # beyond escaped char
461
462 # end while p < q:
463
464 self.lastch = lastch
Miss Islington (bot)dfa11442018-02-21 22:41:41 -0800465 self.lastopenbracketpos = stack[-1] if stack else None
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000466 self.stmt_bracketing = tuple(bracketing)
David Scherer7aced172000-08-15 01:13:23 +0000467
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000468 def compute_bracket_indent(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800469 """Return number of spaces the next line should be indented.
470
471 Line continuation must be C_BRACKET.
472 """
David Scherer7aced172000-08-15 01:13:23 +0000473 self._study2()
474 assert self.continuation == C_BRACKET
475 j = self.lastopenbracketpos
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800476 code = self.code
477 n = len(code)
478 origi = i = code.rfind('\n', 0, j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000479 j = j+1 # one beyond open bracket
480 # find first list item; set i to start of its line
481 while j < n:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800482 m = _itemre(code, j)
David Scherer7aced172000-08-15 01:13:23 +0000483 if m:
484 j = m.end() - 1 # index of first interesting char
485 extra = 0
486 break
487 else:
488 # this line is junk; advance to next line
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800489 i = j = code.find('\n', j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000490 else:
491 # nothing interesting follows the bracket;
492 # reproduce the bracket line's indentation + a level
493 j = i = origi
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800494 while code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000495 j = j+1
496 extra = self.indentwidth
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800497 return len(code[i:j].expandtabs(self.tabwidth)) + extra
David Scherer7aced172000-08-15 01:13:23 +0000498
David Scherer7aced172000-08-15 01:13:23 +0000499 def get_num_lines_in_stmt(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800500 """Return number of physical lines in last stmt.
501
502 The statement doesn't have to be an interesting statement. This is
503 intended to be called when continuation is C_BACKSLASH.
504 """
David Scherer7aced172000-08-15 01:13:23 +0000505 self._study1()
506 goodlines = self.goodlines
507 return goodlines[-1] - goodlines[-2]
508
David Scherer7aced172000-08-15 01:13:23 +0000509 def compute_backslash_indent(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800510 """Return number of spaces the next line should be indented.
511
512 Line continuation must be C_BACKSLASH. Also assume that the new
513 line is the first one following the initial line of the stmt.
514 """
David Scherer7aced172000-08-15 01:13:23 +0000515 self._study2()
516 assert self.continuation == C_BACKSLASH
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800517 code = self.code
David Scherer7aced172000-08-15 01:13:23 +0000518 i = self.stmt_start
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800519 while code[i] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000520 i = i+1
521 startpos = i
522
523 # See whether the initial line starts an assignment stmt; i.e.,
524 # look for an = operator
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800525 endpos = code.find('\n', startpos) + 1
David Scherer7aced172000-08-15 01:13:23 +0000526 found = level = 0
527 while i < endpos:
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800528 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000529 if ch in "([{":
530 level = level + 1
531 i = i+1
532 elif ch in ")]}":
533 if level:
534 level = level - 1
535 i = i+1
536 elif ch == '"' or ch == "'":
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800537 i = _match_stringre(code, i, endpos).end()
David Scherer7aced172000-08-15 01:13:23 +0000538 elif ch == '#':
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800539 # This line is unreachable because the # makes a comment of
540 # everything after it.
David Scherer7aced172000-08-15 01:13:23 +0000541 break
542 elif level == 0 and ch == '=' and \
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800543 (i == 0 or code[i-1] not in "=<>!") and \
544 code[i+1] != '=':
David Scherer7aced172000-08-15 01:13:23 +0000545 found = 1
546 break
547 else:
548 i = i+1
549
550 if found:
551 # found a legit =, but it may be the last interesting
552 # thing on the line
553 i = i+1 # move beyond the =
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800554 found = re.match(r"\s*\\", code[i:endpos]) is None
David Scherer7aced172000-08-15 01:13:23 +0000555
556 if not found:
557 # oh well ... settle for moving beyond the first chunk
558 # of non-whitespace chars
559 i = startpos
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800560 while code[i] not in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000561 i = i+1
562
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800563 return len(code[self.stmt_start:i].expandtabs(\
David Scherer7aced172000-08-15 01:13:23 +0000564 self.tabwidth)) + 1
565
David Scherer7aced172000-08-15 01:13:23 +0000566 def get_base_indent_string(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800567 """Return the leading whitespace on the initial line of the last
568 interesting stmt.
569 """
David Scherer7aced172000-08-15 01:13:23 +0000570 self._study2()
571 i, n = self.stmt_start, self.stmt_end
572 j = i
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800573 code = self.code
574 while j < n and code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000575 j = j + 1
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800576 return code[i:j]
David Scherer7aced172000-08-15 01:13:23 +0000577
David Scherer7aced172000-08-15 01:13:23 +0000578 def is_block_opener(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800579 "Return True if the last interesting statemtent opens a block."
David Scherer7aced172000-08-15 01:13:23 +0000580 self._study2()
581 return self.lastch == ':'
582
David Scherer7aced172000-08-15 01:13:23 +0000583 def is_block_closer(self):
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800584 "Return True if the last interesting statement closes a block."
David Scherer7aced172000-08-15 01:13:23 +0000585 self._study2()
Miss Islington (bot)f409c992018-02-23 18:59:53 -0800586 return _closere(self.code, self.stmt_start) is not None
David Scherer7aced172000-08-15 01:13:23 +0000587
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000588 def get_last_stmt_bracketing(self):
Miss Islington (bot)dfa11442018-02-21 22:41:41 -0800589 """Return bracketing structure of the last interesting statement.
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800590
Miss Islington (bot)dfa11442018-02-21 22:41:41 -0800591 The returned tuple is in the format defined in _study2().
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800592 """
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000593 self._study2()
594 return self.stmt_bracketing
Miss Islington (bot)c59bc982018-02-21 20:09:39 -0800595
596
Miss Islington (bot)740f1cb2018-06-18 02:10:38 -0700597if __name__ == '__main__':
598 from unittest import main
599 main('idlelib.idle_test.test_pyparse', verbosity=2)