Blame - Tools/idle/PyParse.py - platform/external/python/cpython2

blob: e3b6e1f66d30817545aa4753618705db8952116d [file] [log] [blame]

Guido van Rossum	8113cdc	1999-06-01 19:49:21 +0000	[diff] [blame]	1	import string
				2	import re
				3	import sys
				4
				5	# Reason last stmt is continued (or C_NONE if it's not).
				6	C_NONE, C_BACKSLASH, C_STRING, C_BRACKET = range(4)
				7
				8	if 0: # for throwaway debugging output
				9	def dump(*stuff):
				10	import sys
				11	sys.__stdout__.write(string.join(map(str, stuff), " ") + "\n")
				12
				13	# find a def or class stmt
				14	_defclassre = re.compile(r"""
				15	^
				16	[ \t]*
				17	(?:
				18	def [ \t]+ [a-zA-Z_]\w* [ \t]* \(
				19	\| class [ \t]+ [a-zA-Z_]\w* [ \t]*
				20	(?: \( .* \) )?
				21	[ \t]* :
				22	)
				23	""", re.VERBOSE \| re.MULTILINE).search
				24
				25	# match blank line or non-indenting comment line
				26	_junkre = re.compile(r"""
				27	[ \t]*
				28	(?: \# [^ \t\n] .* )?
				29	\n
				30	""", re.VERBOSE).match
				31
				32	# match any flavor of string; the terminating quote is optional
				33	# so that we're robust in the face of incomplete program text
				34	_match_stringre = re.compile(r"""
				35	\""" [^"\\]* (?:
				36	(?: \\. \| "(?!"") )
				37	[^"\\]*
				38	)*
				39	(?: \""" )?
				40
				41	\| " [^"\\\n]* (?: \\. [^"\\\n]* )* "?
				42
				43	\| ''' [^'\\]* (?:
				44	(?: \\. \| '(?!'') )
				45	[^'\\]*
				46	)*
				47	(?: ''' )?
				48
				49	\| ' [^'\\\n]* (?: \\. [^'\\\n]* )* '?
				50	""", re.VERBOSE \| re.DOTALL).match
				51
				52	# match a line that doesn't start with something interesting;
				53	# used to skip junk lines when searching for the first element
				54	# of a bracket structure
				55	_not_itemre = re.compile(r"""
				56	[ \t]*
				57	[#\n\\]
				58	""", re.VERBOSE).match
				59
				60	# match start of stmts that should be followed by a dedent
				61	_closere = re.compile(r"""
				62	\s*
				63	(?: return
				64	\| break
				65	\| continue
				66	\| raise
				67	\| pass
				68	)
				69	\b
				70	""", re.VERBOSE).match
				71
				72	# Build translation table to map uninteresting chars to "x", open
				73	# brackets to "(", and close brackets to ")".
				74
				75	_tran = ['x'] * 256
				76	for ch in "({[":
				77	_tran[ord(ch)] = '('
				78	for ch in ")}]":
				79	_tran[ord(ch)] = ')'
				80	for ch in "\"'\\\n#":
				81	_tran[ord(ch)] = ch
				82	_tran = string.join(_tran, '')
				83	del ch
				84
				85	class Parser:
				86
				87	def __init__(self, indentwidth, tabwidth):
				88	self.indentwidth = indentwidth
				89	self.tabwidth = tabwidth
				90
				91	def set_str(self, str):
				92	assert len(str) == 0 or str[-1] == '\n'
				93	self.str = str
				94	self.study_level = 0
				95
				96	# Return index of start of last (probable!) def or class stmt, or
				97	# None if none found. It's only probable because we can't know
				98	# whether we're in a string without reparsing from the start of
				99	# the file -- and that's too slow to bear.
				100	#
				101	# Ack, hack: in the shell window this kills us, because there's
				102	# no way to tell the differences between output, >>> etc and
				103	# user input. Indeed, IDLE's first output line makes the rest
				104	# look like it's in an unclosed paren!:
				105	# Python 1.5.2 (#0, Apr 13 1999, ...
				106
				107	def find_last_def_or_class(self, _defclassre=_defclassre):
				108	str, pos = self.str, None
				109	i = 0
				110	while 1:
				111	m = _defclassre(str, i)
				112	if m:
				113	pos, i = m.span()
				114	else:
				115	break
				116	if pos is None:
				117	# hack for shell window
				118	ps1 = '\n' + sys.ps1
				119	i = string.rfind(str, ps1)
				120	if i >= 0:
				121	pos = i + len(ps1)
				122	self.str = str[:pos-1] + '\n' + str[pos:]
				123	return pos
				124
				125	# Throw away the start of the string. Intended to be called with
				126	# find_last_def_or_class's result.
				127
				128	def set_lo(self, lo):
				129	assert lo == 0 or self.str[lo-1] == '\n'
				130	if lo > 0:
				131	self.str = self.str[lo:]
				132
				133	# As quickly as humanly possible <wink>, find the line numbers (0-
				134	# based) of the non-continuation lines.
				135	# Creates self.{stmts, continuation}.
				136
				137	def _study1(self, _replace=string.replace, _find=string.find):
				138	if self.study_level >= 1:
				139	return
				140	self.study_level = 1
				141
				142	# Map all uninteresting characters to "x", all open brackets
				143	# to "(", all close brackets to ")", then collapse runs of
				144	# uninteresting characters. This can cut the number of chars
				145	# by a factor of 10-40, and so greatly speed the following loop.
				146	str = self.str
				147	str = string.translate(str, _tran)
				148	str = _replace(str, 'xxxxxxxx', 'x')
				149	str = _replace(str, 'xxxx', 'x')
				150	str = _replace(str, 'xx', 'x')
				151	str = _replace(str, 'xx', 'x')
				152	str = _replace(str, '\nx', '\n')
				153	# note that replacing x\n with \n would be incorrect, because
				154	# x may be preceded by a backslash
				155
				156	# March over the squashed version of the program, accumulating
				157	# the line numbers of non-continued stmts, and determining
				158	# whether & why the last stmt is a continuation.
				159	continuation = C_NONE
				160	level = lno = 0 # level is nesting level; lno is line number
				161	self.stmts = stmts = [0]
				162	push_stmt = stmts.append
				163	i, n = 0, len(str)
				164	while i < n:
				165	ch = str[i]
				166	# cases are checked in decreasing order of frequency
				167
				168	if ch == 'x':
				169	i = i+1
				170	continue
				171
				172	if ch == '\n':
				173	lno = lno + 1
				174	if level == 0:
				175	push_stmt(lno)
				176	# else we're in an unclosed bracket structure
				177	i = i+1
				178	continue
				179
				180	if ch == '(':
				181	level = level + 1
				182	i = i+1
				183	continue
				184
				185	if ch == ')':
				186	if level:
				187	level = level - 1
				188	# else the program is invalid, but we can't complain
				189	i = i+1
				190	continue
				191
				192	if ch == '"' or ch == "'":
				193	# consume the string
				194	quote = ch
				195	if str[i:i+3] == quote * 3:
				196	quote = quote * 3
				197	w = len(quote)
				198	i = i+w
				199	while i < n:
				200	ch = str[i]
				201	if ch == 'x':
				202	i = i+1
				203	continue
				204
				205	if str[i:i+w] == quote:
				206	i = i+w
				207	break
				208
				209	if ch == '\n':
				210	lno = lno + 1
				211	i = i+1
				212	if w == 1:
				213	# unterminated single-quoted string
				214	if level == 0:
				215	push_stmt(lno)
				216	break
				217	continue
				218
				219	if ch == '\\':
				220	assert i+1 < n
				221	if str[i+1] == '\n':
				222	lno = lno + 1
				223	i = i+2
				224	continue
				225
				226	# else comment char or paren inside string
				227	i = i+1
				228
				229	else:
				230	# didn't break out of the loop, so it's an
				231	# unterminated triple-quoted string
				232	assert w == 3
				233	continuation = C_STRING
				234	continue
				235
				236	if ch == '#':
				237	# consume the comment
				238	i = _find(str, '\n', i)
				239	assert i >= 0
				240	continue
				241
				242	assert ch == '\\'
				243	assert i+1 < n
				244	if str[i+1] == '\n':
				245	lno = lno + 1
				246	if i+2 == n:
				247	continuation = C_BACKSLASH
				248	i = i+2
				249
				250	# Push the final line number as a sentinel value, regardless of
				251	# whether it's continued.
				252	if stmts[-1] != lno:
				253	push_stmt(lno)
				254
				255	# The last stmt may be continued for all 3 reasons.
				256	# String continuation takes precedence over bracket
				257	# continuation, which beats backslash continuation.
				258	if continuation != C_STRING and level > 0:
				259	continuation = C_BRACKET
				260	self.continuation = continuation
				261
				262	def get_continuation_type(self):
				263	self._study1()
				264	return self.continuation
				265
				266	# study1 was sufficient to determine the continuation status,
				267	# but doing more requires looking at every character. study2
				268	# does this for the last interesting statement in the block.
				269	# Creates:
				270	# self.stmt_start, stmt_end
				271	# slice indices of last interesting stmt
				272	# self.lastch
				273	# last non-whitespace character before optional trailing
				274	# comment
				275	# self.lastopenbracketpos
				276	# if continuation is C_BRACKET, index of last open bracket
				277
				278	def _study2(self, _rfind=string.rfind, _find=string.find,
				279	_ws=string.whitespace):
				280	if self.study_level >= 2:
				281	return
				282	self._study1()
				283	self.study_level = 2
				284
				285	self.lastch = ""
				286
				287	# Set p and q to slice indices of last interesting stmt.
				288	str, stmts = self.str, self.stmts
				289	i = len(stmts) - 1
				290	p = len(str) # index of newest line
				291	found = 0
				292	while i:
				293	assert p
				294	# p is the index of the stmt at line number stmts[i].
				295	# Move p back to the stmt at line number stmts[i-1].
				296	q = p
				297	for nothing in range(stmts[i-1], stmts[i]):
				298	# tricky: sets p to 0 if no preceding newline
				299	p = _rfind(str, '\n', 0, p-1) + 1
				300	# The stmt str[p:q] isn't a continuation, but may be blank
				301	# or a non-indenting comment line.
				302	if _junkre(str, p):
				303	i = i-1
				304	else:
				305	found = 1
				306	break
				307	self.stmt_start, self.stmt_end = p, q
				308
				309	# Analyze this stmt, to find the last open bracket (if any)
				310	# and last interesting character (if any).
				311	stack = [] # stack of open bracket indices
				312	push_stack = stack.append
				313	while p < q:
				314	ch = str[p]
				315	if ch == '"' or ch == "'":
				316	# consume string
				317	# Note that study1 did this with a Python loop, but
				318	# we use a regexp here; the reason is speed in both
				319	# cases; the string may be huge, but study1 pre-squashed
				320	# strings to a couple of characters per line. study1
				321	# also needed to keep track of newlines, and we don't
				322	# have to.
				323	self.lastch = ch
				324	p = _match_stringre(str, p, q).end()
				325	continue
				326
				327	if ch == '#':
				328	# consume comment and trailing newline
				329	p = _find(str, '\n', p, q) + 1
				330	assert p > 0
				331	continue
				332
				333	if ch == '\\':
				334	assert p+1 < q
				335	if str[p+1] != '\n':
				336	# the program is invalid, but can't complain
				337	self.lastch = str[p:p+2]
				338	p = p+2
				339	continue
				340
				341	if ch not in _ws:
				342	self.lastch = ch
				343	if ch in "([{":
				344	push_stack(p)
				345	elif ch in ")]}" and stack:
				346	del stack[-1]
				347	p = p+1
				348
				349	# end while p < q:
				350
				351	if stack:
				352	self.lastopenbracketpos = stack[-1]
				353
				354	# Assuming continuation is C_BRACKET, return the number
				355	# of spaces the next line should be indented.
				356
				357	def compute_bracket_indent(self, _find=string.find):
				358	self._study2()
				359	assert self.continuation == C_BRACKET
				360	j = self.lastopenbracketpos
				361	str = self.str
				362	n = len(str)
				363	origi = i = string.rfind(str, '\n', 0, j) + 1
				364	j = j+1
				365	# find first list item
				366	while _not_itemre(str, j):
				367	# this line is junk; advance to the next line
				368	i = _find(str, '\n', j)
				369	if i < 0:
				370	break
				371	j = i = i+1
				372	if i < 0 or j >= n:
				373	# nothing interesting follows the bracket;
				374	# reproduce the bracket line's indentation + a level
				375	j = i = origi
				376	extra = self.indentwidth
				377	else:
				378	# the first list item begins on this line; line up with
				379	# the first interesting character
				380	extra = 0
				381	while str[j] in " \t":
				382	j = j+1
				383	return len(string.expandtabs(str[i:j],
				384	self.tabwidth)) + extra
				385
				386	# Return number of physical lines in last stmt (whether or not
				387	# it's an interesting stmt! this is intended to be called when
				388	# continuation is C_BACKSLASH).
				389
				390	def get_num_lines_in_stmt(self):
				391	self._study1()
				392	stmts = self.stmts
				393	return stmts[-1] - stmts[-2]
				394
				395	# Assuming continuation is C_BACKSLASH, return the number of spaces
				396	# the next line should be indented. Also assuming the new line is
				397	# the first one following the initial line of the stmt.
				398
				399	def compute_backslash_indent(self):
				400	self._study2()
				401	assert self.continuation == C_BACKSLASH
				402	str = self.str
				403	i = self.stmt_start
				404	while str[i] in " \t":
				405	i = i+1
				406	startpos = i
				407	endpos = string.find(str, '\n', startpos) + 1
				408	found = level = 0
				409	while i < endpos:
				410	ch = str[i]
				411	if ch in "([{":
				412	level = level + 1
				413	i = i+1
				414	elif ch in ")]}":
				415	if level:
				416	level = level - 1
				417	i = i+1
				418	elif ch == '"' or ch == "'":
				419	i = _match_stringre(str, i, endpos).end()
				420	elif ch == '#':
				421	break
				422	elif level == 0 and ch == '=' and \
				423	(i == 0 or str[i-1] not in "=<>!") and \
				424	str[i+1] != '=':
				425	found = 1
				426	break
				427	else:
				428	i = i+1
				429
				430	if found:
				431	# found a legit =, but it may be the last interesting
				432	# thing on the line
				433	i = i+1 # move beyond the =
				434	found = re.match(r"\s*\\", str[i:endpos]) is None
				435
				436	if not found:
				437	# oh well ... settle for moving beyond the first chunk
				438	# of non-whitespace chars
				439	i = startpos
				440	while str[i] not in " \t\n":
				441	i = i+1
				442
				443	return len(string.expandtabs(str[self.stmt_start :
				444	i],
				445	self.tabwidth)) + 1
				446
				447	# Return the leading whitespace on the initial line of the last
				448	# interesting stmt.
				449
				450	def get_base_indent_string(self):
				451	self._study2()
				452	i, n = self.stmt_start, self.stmt_end
				453	assert i is not None
				454	j = i
				455	str = self.str
				456	while j < n and str[j] in " \t":
				457	j = j + 1
				458	return str[i:j]
				459
				460	# Did the last interesting stmt open a block?
				461
				462	def is_block_opener(self):
				463	self._study2()
				464	return self.lastch == ':'
				465
				466	# Did the last interesting stmt close a block?
				467
				468	def is_block_closer(self):
				469	self._study2()
				470	return _closere(self.str, self.stmt_start) is not None