blob: 1eeb9154d9061049155b5879246608b2b54fd6e7 [file] [log] [blame]
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -05001"""Define partial Python code Parser used by editor and hyperparser.
2
Cheryl Sabellaf0daa882018-02-28 17:23:58 -05003Instances of ParseMap are used with str.translate.
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -05004
5The following bound search and match functions are defined:
6_synchre - start of popular statement;
7_junkre - whitespace or comment line;
8_match_stringre: string, possibly without closer;
9_itemre - line that may have bracket structure start;
10_closere - line that must be followed by dedent.
11_chew_ordinaryre - non-special characters.
12"""
David Scherer7aced172000-08-15 01:13:23 +000013import re
14import sys
15
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050016# Reason last statement is continued (or C_NONE if it's not).
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +000017(C_NONE, C_BACKSLASH, C_STRING_FIRST_LINE,
18 C_STRING_NEXT_LINES, C_BRACKET) = range(5)
David Scherer7aced172000-08-15 01:13:23 +000019
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050020# Find what looks like the start of a popular statement.
David Scherer7aced172000-08-15 01:13:23 +000021
22_synchre = re.compile(r"""
23 ^
24 [ \t]*
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000025 (?: while
David Scherer7aced172000-08-15 01:13:23 +000026 | else
27 | def
28 | return
29 | assert
30 | break
31 | class
32 | continue
33 | elif
34 | try
35 | except
36 | raise
37 | import
Kurt B. Kaiser752e4d52001-07-14 04:59:24 +000038 | yield
David Scherer7aced172000-08-15 01:13:23 +000039 )
40 \b
41""", re.VERBOSE | re.MULTILINE).search
42
43# Match blank line or non-indenting comment line.
44
45_junkre = re.compile(r"""
46 [ \t]*
47 (?: \# \S .* )?
48 \n
49""", re.VERBOSE).match
50
51# Match any flavor of string; the terminating quote is optional
52# so that we're robust in the face of incomplete program text.
53
54_match_stringre = re.compile(r"""
55 \""" [^"\\]* (?:
56 (?: \\. | "(?!"") )
57 [^"\\]*
58 )*
59 (?: \""" )?
60
61| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
62
63| ''' [^'\\]* (?:
64 (?: \\. | '(?!'') )
65 [^'\\]*
66 )*
67 (?: ''' )?
68
69| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
70""", re.VERBOSE | re.DOTALL).match
71
72# Match a line that starts with something interesting;
73# used to find the first item of a bracket structure.
74
75_itemre = re.compile(r"""
76 [ \t]*
77 [^\s#\\] # if we match, m.end()-1 is the interesting char
78""", re.VERBOSE).match
79
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -050080# Match start of statements that should be followed by a dedent.
David Scherer7aced172000-08-15 01:13:23 +000081
82_closere = re.compile(r"""
83 \s*
84 (?: return
85 | break
86 | continue
87 | raise
88 | pass
89 )
90 \b
91""", re.VERBOSE).match
92
93# Chew up non-special chars as quickly as possible. If match is
94# successful, m.end() less 1 is the index of the last boring char
95# matched. If match is unsuccessful, the string starts with an
96# interesting char.
97
98_chew_ordinaryre = re.compile(r"""
99 [^[\](){}#'"\\]+
100""", re.VERBOSE).match
101
David Scherer7aced172000-08-15 01:13:23 +0000102
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500103class ParseMap(dict):
104 r"""Dict subclass that maps anything not in dict to 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300105
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500106 This is designed to be used with str.translate in study1.
107 Anything not specifically mapped otherwise becomes 'x'.
108 Example: replace everything except whitespace with 'x'.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300109
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500110 >>> keepwhite = ParseMap((ord(c), ord(c)) for c in ' \t\n\r')
111 >>> "a + b\tc\nd".translate(keepwhite)
Tal Einat9b7f9e62014-07-16 16:33:36 +0300112 'x x x\tx\nx'
113 """
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500114 # Calling this triples access time; see bpo-32940
115 def __missing__(self, key):
116 return 120 # ord('x')
Tal Einat9b7f9e62014-07-16 16:33:36 +0300117
Tal Einat9b7f9e62014-07-16 16:33:36 +0300118
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500119# Map all ascii to 120 to avoid __missing__ call, then replace some.
120trans = ParseMap.fromkeys(range(128), 120)
121trans.update((ord(c), ord('(')) for c in "({[") # open brackets => '(';
122trans.update((ord(c), ord(')')) for c in ")}]") # close brackets => ')'.
123trans.update((ord(c), ord(c)) for c in "\"'\\\n#") # Keep these.
Tal Einat9b7f9e62014-07-16 16:33:36 +0300124
David Scherer7aced172000-08-15 01:13:23 +0000125
126class Parser:
127
128 def __init__(self, indentwidth, tabwidth):
129 self.indentwidth = indentwidth
130 self.tabwidth = tabwidth
131
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500132 def set_code(self, s):
Walter Dörwald5de48bd2007-06-11 21:38:39 +0000133 assert len(s) == 0 or s[-1] == '\n'
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500134 self.code = s
David Scherer7aced172000-08-15 01:13:23 +0000135 self.study_level = 0
136
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000137 def find_good_parse_start(self, is_char_in_string=None,
David Scherer7aced172000-08-15 01:13:23 +0000138 _synchre=_synchre):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500139 """
140 Return index of a good place to begin parsing, as close to the
141 end of the string as possible. This will be the start of some
142 popular stmt like "if" or "def". Return None if none found:
143 the caller should pass more prior context then, if possible, or
144 if not (the entire program text up until the point of interest
145 has already been tried) pass 0 to set_lo().
146
147 This will be reliable iff given a reliable is_char_in_string()
148 function, meaning that when it says "no", it's absolutely
149 guaranteed that the char is not in a string.
150 """
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500151 code, pos = self.code, None
David Scherer7aced172000-08-15 01:13:23 +0000152
David Scherer7aced172000-08-15 01:13:23 +0000153 if not is_char_in_string:
154 # no clue -- make the caller pass everything
155 return None
156
157 # Peek back from the end for a good place to start,
158 # but don't try too often; pos will be left None, or
159 # bumped to a legitimate synch point.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500160 limit = len(code)
David Scherer7aced172000-08-15 01:13:23 +0000161 for tries in range(5):
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500162 i = code.rfind(":\n", 0, limit)
David Scherer7aced172000-08-15 01:13:23 +0000163 if i < 0:
164 break
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500165 i = code.rfind('\n', 0, i) + 1 # start of colon line (-1+1=0)
166 m = _synchre(code, i, limit)
David Scherer7aced172000-08-15 01:13:23 +0000167 if m and not is_char_in_string(m.start()):
168 pos = m.start()
169 break
170 limit = i
171 if pos is None:
172 # Nothing looks like a block-opener, or stuff does
173 # but is_char_in_string keeps returning true; most likely
174 # we're in or near a giant string, the colorizer hasn't
175 # caught up enough to be helpful, or there simply *aren't*
176 # any interesting stmts. In any of these cases we're
177 # going to have to parse the whole thing to be sure, so
178 # give it one last try from the start, but stop wasting
179 # time here regardless of the outcome.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500180 m = _synchre(code)
David Scherer7aced172000-08-15 01:13:23 +0000181 if m and not is_char_in_string(m.start()):
182 pos = m.start()
183 return pos
184
185 # Peeking back worked; look forward until _synchre no longer
186 # matches.
187 i = pos + 1
188 while 1:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500189 m = _synchre(code, i)
David Scherer7aced172000-08-15 01:13:23 +0000190 if m:
191 s, i = m.span()
192 if not is_char_in_string(s):
193 pos = s
194 else:
195 break
196 return pos
197
David Scherer7aced172000-08-15 01:13:23 +0000198 def set_lo(self, lo):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500199 """ Throw away the start of the string.
200
201 Intended to be called with the result of find_good_parse_start().
202 """
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500203 assert lo == 0 or self.code[lo-1] == '\n'
David Scherer7aced172000-08-15 01:13:23 +0000204 if lo > 0:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500205 self.code = self.code[lo:]
David Scherer7aced172000-08-15 01:13:23 +0000206
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000207 def _study1(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500208 """Find the line numbers of non-continuation lines.
209
210 As quickly as humanly possible <wink>, find the line numbers (0-
211 based) of the non-continuation lines.
212 Creates self.{goodlines, continuation}.
213 """
David Scherer7aced172000-08-15 01:13:23 +0000214 if self.study_level >= 1:
215 return
216 self.study_level = 1
217
218 # Map all uninteresting characters to "x", all open brackets
219 # to "(", all close brackets to ")", then collapse runs of
220 # uninteresting characters. This can cut the number of chars
221 # by a factor of 10-40, and so greatly speed the following loop.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500222 code = self.code
Cheryl Sabellaf0daa882018-02-28 17:23:58 -0500223 code = code.translate(trans)
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500224 code = code.replace('xxxxxxxx', 'x')
225 code = code.replace('xxxx', 'x')
226 code = code.replace('xx', 'x')
227 code = code.replace('xx', 'x')
228 code = code.replace('\nx', '\n')
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500229 # Replacing x\n with \n would be incorrect because
230 # x may be preceded by a backslash.
David Scherer7aced172000-08-15 01:13:23 +0000231
232 # March over the squashed version of the program, accumulating
233 # the line numbers of non-continued stmts, and determining
234 # whether & why the last stmt is a continuation.
235 continuation = C_NONE
236 level = lno = 0 # level is nesting level; lno is line number
237 self.goodlines = goodlines = [0]
238 push_good = goodlines.append
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500239 i, n = 0, len(code)
David Scherer7aced172000-08-15 01:13:23 +0000240 while i < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500241 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000242 i = i+1
243
244 # cases are checked in decreasing order of frequency
245 if ch == 'x':
246 continue
247
248 if ch == '\n':
249 lno = lno + 1
250 if level == 0:
251 push_good(lno)
252 # else we're in an unclosed bracket structure
253 continue
254
255 if ch == '(':
256 level = level + 1
257 continue
258
259 if ch == ')':
260 if level:
261 level = level - 1
262 # else the program is invalid, but we can't complain
263 continue
264
265 if ch == '"' or ch == "'":
266 # consume the string
267 quote = ch
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500268 if code[i-1:i+2] == quote * 3:
David Scherer7aced172000-08-15 01:13:23 +0000269 quote = quote * 3
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000270 firstlno = lno
David Scherer7aced172000-08-15 01:13:23 +0000271 w = len(quote) - 1
272 i = i+w
273 while i < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500274 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000275 i = i+1
276
277 if ch == 'x':
278 continue
279
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500280 if code[i-1:i+w] == quote:
David Scherer7aced172000-08-15 01:13:23 +0000281 i = i+w
282 break
283
284 if ch == '\n':
285 lno = lno + 1
286 if w == 0:
287 # unterminated single-quoted string
288 if level == 0:
289 push_good(lno)
290 break
291 continue
292
293 if ch == '\\':
294 assert i < n
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500295 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000296 lno = lno + 1
297 i = i+1
298 continue
299
300 # else comment char or paren inside string
301
302 else:
303 # didn't break out of the loop, so we're still
304 # inside a string
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000305 if (lno - 1) == firstlno:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500306 # before the previous \n in code, we were in the first
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000307 # line of the string
308 continuation = C_STRING_FIRST_LINE
309 else:
310 continuation = C_STRING_NEXT_LINES
David Scherer7aced172000-08-15 01:13:23 +0000311 continue # with outer loop
312
313 if ch == '#':
314 # consume the comment
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500315 i = code.find('\n', i)
David Scherer7aced172000-08-15 01:13:23 +0000316 assert i >= 0
317 continue
318
319 assert ch == '\\'
320 assert i < n
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500321 if code[i] == '\n':
David Scherer7aced172000-08-15 01:13:23 +0000322 lno = lno + 1
323 if i+1 == n:
324 continuation = C_BACKSLASH
325 i = i+1
326
327 # The last stmt may be continued for all 3 reasons.
328 # String continuation takes precedence over bracket
329 # continuation, which beats backslash continuation.
Kurt B. Kaiserb61602c2005-11-15 07:20:06 +0000330 if (continuation != C_STRING_FIRST_LINE
331 and continuation != C_STRING_NEXT_LINES and level > 0):
David Scherer7aced172000-08-15 01:13:23 +0000332 continuation = C_BRACKET
333 self.continuation = continuation
334
335 # Push the final line number as a sentinel value, regardless of
336 # whether it's continued.
337 assert (continuation == C_NONE) == (goodlines[-1] == lno)
338 if goodlines[-1] != lno:
339 push_good(lno)
340
341 def get_continuation_type(self):
342 self._study1()
343 return self.continuation
344
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000345 def _study2(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500346 """
347 study1 was sufficient to determine the continuation status,
348 but doing more requires looking at every character. study2
349 does this for the last interesting statement in the block.
350 Creates:
351 self.stmt_start, stmt_end
352 slice indices of last interesting stmt
353 self.stmt_bracketing
354 the bracketing structure of the last interesting stmt; for
355 example, for the statement "say(boo) or die",
356 stmt_bracketing will be ((0, 0), (0, 1), (2, 0), (2, 1),
357 (4, 0)). Strings and comments are treated as brackets, for
358 the matter.
359 self.lastch
360 last interesting character before optional trailing comment
361 self.lastopenbracketpos
362 if continuation is C_BRACKET, index of last open bracket
363 """
David Scherer7aced172000-08-15 01:13:23 +0000364 if self.study_level >= 2:
365 return
366 self._study1()
367 self.study_level = 2
368
369 # Set p and q to slice indices of last interesting stmt.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500370 code, goodlines = self.code, self.goodlines
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500371 i = len(goodlines) - 1 # Index of newest line.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500372 p = len(code) # End of goodlines[i]
David Scherer7aced172000-08-15 01:13:23 +0000373 while i:
374 assert p
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500375 # Make p be the index of the stmt at line number goodlines[i].
David Scherer7aced172000-08-15 01:13:23 +0000376 # Move p back to the stmt at line number goodlines[i-1].
377 q = p
378 for nothing in range(goodlines[i-1], goodlines[i]):
379 # tricky: sets p to 0 if no preceding newline
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500380 p = code.rfind('\n', 0, p-1) + 1
381 # The stmt code[p:q] isn't a continuation, but may be blank
David Scherer7aced172000-08-15 01:13:23 +0000382 # or a non-indenting comment line.
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500383 if _junkre(code, p):
David Scherer7aced172000-08-15 01:13:23 +0000384 i = i-1
385 else:
386 break
387 if i == 0:
388 # nothing but junk!
389 assert p == 0
390 q = p
391 self.stmt_start, self.stmt_end = p, q
392
393 # Analyze this stmt, to find the last open bracket (if any)
394 # and last interesting character (if any).
395 lastch = ""
396 stack = [] # stack of open bracket indices
397 push_stack = stack.append
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000398 bracketing = [(p, 0)]
David Scherer7aced172000-08-15 01:13:23 +0000399 while p < q:
400 # suck up all except ()[]{}'"#\\
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500401 m = _chew_ordinaryre(code, p, q)
David Scherer7aced172000-08-15 01:13:23 +0000402 if m:
403 # we skipped at least one boring char
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000404 newp = m.end()
David Scherer7aced172000-08-15 01:13:23 +0000405 # back up over totally boring whitespace
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000406 i = newp - 1 # index of last boring char
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500407 while i >= p and code[i] in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000408 i = i-1
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000409 if i >= p:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500410 lastch = code[i]
Kurt B. Kaiser3269cc82001-07-13 20:33:46 +0000411 p = newp
David Scherer7aced172000-08-15 01:13:23 +0000412 if p >= q:
413 break
414
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500415 ch = code[p]
David Scherer7aced172000-08-15 01:13:23 +0000416
417 if ch in "([{":
418 push_stack(p)
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000419 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000420 lastch = ch
421 p = p+1
422 continue
423
424 if ch in ")]}":
425 if stack:
426 del stack[-1]
427 lastch = ch
428 p = p+1
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000429 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000430 continue
431
432 if ch == '"' or ch == "'":
433 # consume string
434 # Note that study1 did this with a Python loop, but
435 # we use a regexp here; the reason is speed in both
436 # cases; the string may be huge, but study1 pre-squashed
437 # strings to a couple of characters per line. study1
438 # also needed to keep track of newlines, and we don't
439 # have to.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000440 bracketing.append((p, len(stack)+1))
David Scherer7aced172000-08-15 01:13:23 +0000441 lastch = ch
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500442 p = _match_stringre(code, p, q).end()
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000443 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000444 continue
445
446 if ch == '#':
447 # consume comment and trailing newline
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000448 bracketing.append((p, len(stack)+1))
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500449 p = code.find('\n', p, q) + 1
David Scherer7aced172000-08-15 01:13:23 +0000450 assert p > 0
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000451 bracketing.append((p, len(stack)))
David Scherer7aced172000-08-15 01:13:23 +0000452 continue
453
454 assert ch == '\\'
455 p = p+1 # beyond backslash
456 assert p < q
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500457 if code[p] != '\n':
David Scherer7aced172000-08-15 01:13:23 +0000458 # the program is invalid, but can't complain
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500459 lastch = ch + code[p]
David Scherer7aced172000-08-15 01:13:23 +0000460 p = p+1 # beyond escaped char
461
462 # end while p < q:
463
464 self.lastch = lastch
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500465 self.lastopenbracketpos = stack[-1] if stack else None
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000466 self.stmt_bracketing = tuple(bracketing)
David Scherer7aced172000-08-15 01:13:23 +0000467
Kurt B. Kaiser254eb532002-09-17 03:55:13 +0000468 def compute_bracket_indent(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500469 """Return number of spaces the next line should be indented.
470
471 Line continuation must be C_BRACKET.
472 """
David Scherer7aced172000-08-15 01:13:23 +0000473 self._study2()
474 assert self.continuation == C_BRACKET
475 j = self.lastopenbracketpos
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500476 code = self.code
477 n = len(code)
478 origi = i = code.rfind('\n', 0, j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000479 j = j+1 # one beyond open bracket
480 # find first list item; set i to start of its line
481 while j < n:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500482 m = _itemre(code, j)
David Scherer7aced172000-08-15 01:13:23 +0000483 if m:
484 j = m.end() - 1 # index of first interesting char
485 extra = 0
486 break
487 else:
488 # this line is junk; advance to next line
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500489 i = j = code.find('\n', j) + 1
David Scherer7aced172000-08-15 01:13:23 +0000490 else:
491 # nothing interesting follows the bracket;
492 # reproduce the bracket line's indentation + a level
493 j = i = origi
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500494 while code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000495 j = j+1
496 extra = self.indentwidth
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500497 return len(code[i:j].expandtabs(self.tabwidth)) + extra
David Scherer7aced172000-08-15 01:13:23 +0000498
David Scherer7aced172000-08-15 01:13:23 +0000499 def get_num_lines_in_stmt(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500500 """Return number of physical lines in last stmt.
501
502 The statement doesn't have to be an interesting statement. This is
503 intended to be called when continuation is C_BACKSLASH.
504 """
David Scherer7aced172000-08-15 01:13:23 +0000505 self._study1()
506 goodlines = self.goodlines
507 return goodlines[-1] - goodlines[-2]
508
David Scherer7aced172000-08-15 01:13:23 +0000509 def compute_backslash_indent(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500510 """Return number of spaces the next line should be indented.
511
512 Line continuation must be C_BACKSLASH. Also assume that the new
513 line is the first one following the initial line of the stmt.
514 """
David Scherer7aced172000-08-15 01:13:23 +0000515 self._study2()
516 assert self.continuation == C_BACKSLASH
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500517 code = self.code
David Scherer7aced172000-08-15 01:13:23 +0000518 i = self.stmt_start
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500519 while code[i] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000520 i = i+1
521 startpos = i
522
523 # See whether the initial line starts an assignment stmt; i.e.,
524 # look for an = operator
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500525 endpos = code.find('\n', startpos) + 1
David Scherer7aced172000-08-15 01:13:23 +0000526 found = level = 0
527 while i < endpos:
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500528 ch = code[i]
David Scherer7aced172000-08-15 01:13:23 +0000529 if ch in "([{":
530 level = level + 1
531 i = i+1
532 elif ch in ")]}":
533 if level:
534 level = level - 1
535 i = i+1
536 elif ch == '"' or ch == "'":
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500537 i = _match_stringre(code, i, endpos).end()
David Scherer7aced172000-08-15 01:13:23 +0000538 elif ch == '#':
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500539 # This line is unreachable because the # makes a comment of
540 # everything after it.
David Scherer7aced172000-08-15 01:13:23 +0000541 break
542 elif level == 0 and ch == '=' and \
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500543 (i == 0 or code[i-1] not in "=<>!") and \
544 code[i+1] != '=':
David Scherer7aced172000-08-15 01:13:23 +0000545 found = 1
546 break
547 else:
548 i = i+1
549
550 if found:
551 # found a legit =, but it may be the last interesting
552 # thing on the line
553 i = i+1 # move beyond the =
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500554 found = re.match(r"\s*\\", code[i:endpos]) is None
David Scherer7aced172000-08-15 01:13:23 +0000555
556 if not found:
557 # oh well ... settle for moving beyond the first chunk
558 # of non-whitespace chars
559 i = startpos
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500560 while code[i] not in " \t\n":
David Scherer7aced172000-08-15 01:13:23 +0000561 i = i+1
562
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500563 return len(code[self.stmt_start:i].expandtabs(\
David Scherer7aced172000-08-15 01:13:23 +0000564 self.tabwidth)) + 1
565
David Scherer7aced172000-08-15 01:13:23 +0000566 def get_base_indent_string(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500567 """Return the leading whitespace on the initial line of the last
568 interesting stmt.
569 """
David Scherer7aced172000-08-15 01:13:23 +0000570 self._study2()
571 i, n = self.stmt_start, self.stmt_end
572 j = i
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500573 code = self.code
574 while j < n and code[j] in " \t":
David Scherer7aced172000-08-15 01:13:23 +0000575 j = j + 1
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500576 return code[i:j]
David Scherer7aced172000-08-15 01:13:23 +0000577
David Scherer7aced172000-08-15 01:13:23 +0000578 def is_block_opener(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500579 "Return True if the last interesting statemtent opens a block."
David Scherer7aced172000-08-15 01:13:23 +0000580 self._study2()
581 return self.lastch == ':'
582
David Scherer7aced172000-08-15 01:13:23 +0000583 def is_block_closer(self):
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500584 "Return True if the last interesting statement closes a block."
David Scherer7aced172000-08-15 01:13:23 +0000585 self._study2()
Cheryl Sabellac29c03a2018-02-23 21:35:27 -0500586 return _closere(self.code, self.stmt_start) is not None
David Scherer7aced172000-08-15 01:13:23 +0000587
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000588 def get_last_stmt_bracketing(self):
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500589 """Return bracketing structure of the last interesting statement.
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500590
Terry Jan Reedy451d1ed2018-02-22 01:19:02 -0500591 The returned tuple is in the format defined in _study2().
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500592 """
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000593 self._study2()
594 return self.stmt_bracketing
Cheryl Sabellac84cf6c2018-02-21 22:48:36 -0500595
596
Terry Jan Reedyea3dc802018-06-18 04:47:59 -0400597if __name__ == '__main__':
598 from unittest import main
599 main('idlelib.idle_test.test_pyparse', verbosity=2)