blob: e3b6e1f66d30817545aa4753618705db8952116d [file] [log] [blame]
Guido van Rossum8113cdc1999-06-01 19:49:21 +00001import string
2import re
3import sys
4
5# Reason last stmt is continued (or C_NONE if it's not).
6C_NONE, C_BACKSLASH, C_STRING, C_BRACKET = range(4)
7
8if 0: # for throwaway debugging output
9 def dump(*stuff):
10 import sys
11 sys.__stdout__.write(string.join(map(str, stuff), " ") + "\n")
12
13# find a def or class stmt
14_defclassre = re.compile(r"""
15 ^
16 [ \t]*
17 (?:
18 def [ \t]+ [a-zA-Z_]\w* [ \t]* \(
19 | class [ \t]+ [a-zA-Z_]\w* [ \t]*
20 (?: \( .* \) )?
21 [ \t]* :
22 )
23""", re.VERBOSE | re.MULTILINE).search
24
25# match blank line or non-indenting comment line
26_junkre = re.compile(r"""
27 [ \t]*
28 (?: \# [^ \t\n] .* )?
29 \n
30""", re.VERBOSE).match
31
32# match any flavor of string; the terminating quote is optional
33# so that we're robust in the face of incomplete program text
34_match_stringre = re.compile(r"""
35 \""" [^"\\]* (?:
36 (?: \\. | "(?!"") )
37 [^"\\]*
38 )*
39 (?: \""" )?
40
41| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
42
43| ''' [^'\\]* (?:
44 (?: \\. | '(?!'') )
45 [^'\\]*
46 )*
47 (?: ''' )?
48
49| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
50""", re.VERBOSE | re.DOTALL).match
51
52# match a line that doesn't start with something interesting;
53# used to skip junk lines when searching for the first element
54# of a bracket structure
55_not_itemre = re.compile(r"""
56 [ \t]*
57 [#\n\\]
58""", re.VERBOSE).match
59
60# match start of stmts that should be followed by a dedent
61_closere = re.compile(r"""
62 \s*
63 (?: return
64 | break
65 | continue
66 | raise
67 | pass
68 )
69 \b
70""", re.VERBOSE).match
71
72# Build translation table to map uninteresting chars to "x", open
73# brackets to "(", and close brackets to ")".
74
75_tran = ['x'] * 256
76for ch in "({[":
77 _tran[ord(ch)] = '('
78for ch in ")}]":
79 _tran[ord(ch)] = ')'
80for ch in "\"'\\\n#":
81 _tran[ord(ch)] = ch
82_tran = string.join(_tran, '')
83del ch
84
85class Parser:
86
87 def __init__(self, indentwidth, tabwidth):
88 self.indentwidth = indentwidth
89 self.tabwidth = tabwidth
90
91 def set_str(self, str):
92 assert len(str) == 0 or str[-1] == '\n'
93 self.str = str
94 self.study_level = 0
95
96 # Return index of start of last (probable!) def or class stmt, or
97 # None if none found. It's only probable because we can't know
98 # whether we're in a string without reparsing from the start of
99 # the file -- and that's too slow to bear.
100 #
101 # Ack, hack: in the shell window this kills us, because there's
102 # no way to tell the differences between output, >>> etc and
103 # user input. Indeed, IDLE's first output line makes the rest
104 # look like it's in an unclosed paren!:
105 # Python 1.5.2 (#0, Apr 13 1999, ...
106
107 def find_last_def_or_class(self, _defclassre=_defclassre):
108 str, pos = self.str, None
109 i = 0
110 while 1:
111 m = _defclassre(str, i)
112 if m:
113 pos, i = m.span()
114 else:
115 break
116 if pos is None:
117 # hack for shell window
118 ps1 = '\n' + sys.ps1
119 i = string.rfind(str, ps1)
120 if i >= 0:
121 pos = i + len(ps1)
122 self.str = str[:pos-1] + '\n' + str[pos:]
123 return pos
124
125 # Throw away the start of the string. Intended to be called with
126 # find_last_def_or_class's result.
127
128 def set_lo(self, lo):
129 assert lo == 0 or self.str[lo-1] == '\n'
130 if lo > 0:
131 self.str = self.str[lo:]
132
133 # As quickly as humanly possible <wink>, find the line numbers (0-
134 # based) of the non-continuation lines.
135 # Creates self.{stmts, continuation}.
136
137 def _study1(self, _replace=string.replace, _find=string.find):
138 if self.study_level >= 1:
139 return
140 self.study_level = 1
141
142 # Map all uninteresting characters to "x", all open brackets
143 # to "(", all close brackets to ")", then collapse runs of
144 # uninteresting characters. This can cut the number of chars
145 # by a factor of 10-40, and so greatly speed the following loop.
146 str = self.str
147 str = string.translate(str, _tran)
148 str = _replace(str, 'xxxxxxxx', 'x')
149 str = _replace(str, 'xxxx', 'x')
150 str = _replace(str, 'xx', 'x')
151 str = _replace(str, 'xx', 'x')
152 str = _replace(str, '\nx', '\n')
153 # note that replacing x\n with \n would be incorrect, because
154 # x may be preceded by a backslash
155
156 # March over the squashed version of the program, accumulating
157 # the line numbers of non-continued stmts, and determining
158 # whether & why the last stmt is a continuation.
159 continuation = C_NONE
160 level = lno = 0 # level is nesting level; lno is line number
161 self.stmts = stmts = [0]
162 push_stmt = stmts.append
163 i, n = 0, len(str)
164 while i < n:
165 ch = str[i]
166 # cases are checked in decreasing order of frequency
167
168 if ch == 'x':
169 i = i+1
170 continue
171
172 if ch == '\n':
173 lno = lno + 1
174 if level == 0:
175 push_stmt(lno)
176 # else we're in an unclosed bracket structure
177 i = i+1
178 continue
179
180 if ch == '(':
181 level = level + 1
182 i = i+1
183 continue
184
185 if ch == ')':
186 if level:
187 level = level - 1
188 # else the program is invalid, but we can't complain
189 i = i+1
190 continue
191
192 if ch == '"' or ch == "'":
193 # consume the string
194 quote = ch
195 if str[i:i+3] == quote * 3:
196 quote = quote * 3
197 w = len(quote)
198 i = i+w
199 while i < n:
200 ch = str[i]
201 if ch == 'x':
202 i = i+1
203 continue
204
205 if str[i:i+w] == quote:
206 i = i+w
207 break
208
209 if ch == '\n':
210 lno = lno + 1
211 i = i+1
212 if w == 1:
213 # unterminated single-quoted string
214 if level == 0:
215 push_stmt(lno)
216 break
217 continue
218
219 if ch == '\\':
220 assert i+1 < n
221 if str[i+1] == '\n':
222 lno = lno + 1
223 i = i+2
224 continue
225
226 # else comment char or paren inside string
227 i = i+1
228
229 else:
230 # didn't break out of the loop, so it's an
231 # unterminated triple-quoted string
232 assert w == 3
233 continuation = C_STRING
234 continue
235
236 if ch == '#':
237 # consume the comment
238 i = _find(str, '\n', i)
239 assert i >= 0
240 continue
241
242 assert ch == '\\'
243 assert i+1 < n
244 if str[i+1] == '\n':
245 lno = lno + 1
246 if i+2 == n:
247 continuation = C_BACKSLASH
248 i = i+2
249
250 # Push the final line number as a sentinel value, regardless of
251 # whether it's continued.
252 if stmts[-1] != lno:
253 push_stmt(lno)
254
255 # The last stmt may be continued for all 3 reasons.
256 # String continuation takes precedence over bracket
257 # continuation, which beats backslash continuation.
258 if continuation != C_STRING and level > 0:
259 continuation = C_BRACKET
260 self.continuation = continuation
261
262 def get_continuation_type(self):
263 self._study1()
264 return self.continuation
265
266 # study1 was sufficient to determine the continuation status,
267 # but doing more requires looking at every character. study2
268 # does this for the last interesting statement in the block.
269 # Creates:
270 # self.stmt_start, stmt_end
271 # slice indices of last interesting stmt
272 # self.lastch
273 # last non-whitespace character before optional trailing
274 # comment
275 # self.lastopenbracketpos
276 # if continuation is C_BRACKET, index of last open bracket
277
278 def _study2(self, _rfind=string.rfind, _find=string.find,
279 _ws=string.whitespace):
280 if self.study_level >= 2:
281 return
282 self._study1()
283 self.study_level = 2
284
285 self.lastch = ""
286
287 # Set p and q to slice indices of last interesting stmt.
288 str, stmts = self.str, self.stmts
289 i = len(stmts) - 1
290 p = len(str) # index of newest line
291 found = 0
292 while i:
293 assert p
294 # p is the index of the stmt at line number stmts[i].
295 # Move p back to the stmt at line number stmts[i-1].
296 q = p
297 for nothing in range(stmts[i-1], stmts[i]):
298 # tricky: sets p to 0 if no preceding newline
299 p = _rfind(str, '\n', 0, p-1) + 1
300 # The stmt str[p:q] isn't a continuation, but may be blank
301 # or a non-indenting comment line.
302 if _junkre(str, p):
303 i = i-1
304 else:
305 found = 1
306 break
307 self.stmt_start, self.stmt_end = p, q
308
309 # Analyze this stmt, to find the last open bracket (if any)
310 # and last interesting character (if any).
311 stack = [] # stack of open bracket indices
312 push_stack = stack.append
313 while p < q:
314 ch = str[p]
315 if ch == '"' or ch == "'":
316 # consume string
317 # Note that study1 did this with a Python loop, but
318 # we use a regexp here; the reason is speed in both
319 # cases; the string may be huge, but study1 pre-squashed
320 # strings to a couple of characters per line. study1
321 # also needed to keep track of newlines, and we don't
322 # have to.
323 self.lastch = ch
324 p = _match_stringre(str, p, q).end()
325 continue
326
327 if ch == '#':
328 # consume comment and trailing newline
329 p = _find(str, '\n', p, q) + 1
330 assert p > 0
331 continue
332
333 if ch == '\\':
334 assert p+1 < q
335 if str[p+1] != '\n':
336 # the program is invalid, but can't complain
337 self.lastch = str[p:p+2]
338 p = p+2
339 continue
340
341 if ch not in _ws:
342 self.lastch = ch
343 if ch in "([{":
344 push_stack(p)
345 elif ch in ")]}" and stack:
346 del stack[-1]
347 p = p+1
348
349 # end while p < q:
350
351 if stack:
352 self.lastopenbracketpos = stack[-1]
353
354 # Assuming continuation is C_BRACKET, return the number
355 # of spaces the next line should be indented.
356
357 def compute_bracket_indent(self, _find=string.find):
358 self._study2()
359 assert self.continuation == C_BRACKET
360 j = self.lastopenbracketpos
361 str = self.str
362 n = len(str)
363 origi = i = string.rfind(str, '\n', 0, j) + 1
364 j = j+1
365 # find first list item
366 while _not_itemre(str, j):
367 # this line is junk; advance to the next line
368 i = _find(str, '\n', j)
369 if i < 0:
370 break
371 j = i = i+1
372 if i < 0 or j >= n:
373 # nothing interesting follows the bracket;
374 # reproduce the bracket line's indentation + a level
375 j = i = origi
376 extra = self.indentwidth
377 else:
378 # the first list item begins on this line; line up with
379 # the first interesting character
380 extra = 0
381 while str[j] in " \t":
382 j = j+1
383 return len(string.expandtabs(str[i:j],
384 self.tabwidth)) + extra
385
386 # Return number of physical lines in last stmt (whether or not
387 # it's an interesting stmt! this is intended to be called when
388 # continuation is C_BACKSLASH).
389
390 def get_num_lines_in_stmt(self):
391 self._study1()
392 stmts = self.stmts
393 return stmts[-1] - stmts[-2]
394
395 # Assuming continuation is C_BACKSLASH, return the number of spaces
396 # the next line should be indented. Also assuming the new line is
397 # the first one following the initial line of the stmt.
398
399 def compute_backslash_indent(self):
400 self._study2()
401 assert self.continuation == C_BACKSLASH
402 str = self.str
403 i = self.stmt_start
404 while str[i] in " \t":
405 i = i+1
406 startpos = i
407 endpos = string.find(str, '\n', startpos) + 1
408 found = level = 0
409 while i < endpos:
410 ch = str[i]
411 if ch in "([{":
412 level = level + 1
413 i = i+1
414 elif ch in ")]}":
415 if level:
416 level = level - 1
417 i = i+1
418 elif ch == '"' or ch == "'":
419 i = _match_stringre(str, i, endpos).end()
420 elif ch == '#':
421 break
422 elif level == 0 and ch == '=' and \
423 (i == 0 or str[i-1] not in "=<>!") and \
424 str[i+1] != '=':
425 found = 1
426 break
427 else:
428 i = i+1
429
430 if found:
431 # found a legit =, but it may be the last interesting
432 # thing on the line
433 i = i+1 # move beyond the =
434 found = re.match(r"\s*\\", str[i:endpos]) is None
435
436 if not found:
437 # oh well ... settle for moving beyond the first chunk
438 # of non-whitespace chars
439 i = startpos
440 while str[i] not in " \t\n":
441 i = i+1
442
443 return len(string.expandtabs(str[self.stmt_start :
444 i],
445 self.tabwidth)) + 1
446
447 # Return the leading whitespace on the initial line of the last
448 # interesting stmt.
449
450 def get_base_indent_string(self):
451 self._study2()
452 i, n = self.stmt_start, self.stmt_end
453 assert i is not None
454 j = i
455 str = self.str
456 while j < n and str[j] in " \t":
457 j = j + 1
458 return str[i:j]
459
460 # Did the last interesting stmt open a block?
461
462 def is_block_opener(self):
463 self._study2()
464 return self.lastch == ':'
465
466 # Did the last interesting stmt close a block?
467
468 def is_block_closer(self):
469 self._study2()
470 return _closere(self.str, self.stmt_start) is not None